]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
Update copyright years.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
cbe34bb5 2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
20f06221
DN
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
20f06221
DN
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20f06221
DN
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
c7131fb2 24#include "backend.h"
957060b5 25#include "rtl.h"
20f06221 26#include "tree.h"
c7131fb2 27#include "gimple.h"
c7131fb2 28#include "ssa.h"
957060b5
AM
29#include "expmed.h"
30#include "optabs-tree.h"
31#include "insn-config.h"
32#include "recog.h" /* FIXME: for insn_data */
40e23961 33#include "fold-const.h"
d8a2d370 34#include "stor-layout.h"
2fb9a547 35#include "tree-eh.h"
45b0be94 36#include "gimplify.h"
5be5c238 37#include "gimple-iterator.h"
20f06221 38#include "cfgloop.h"
20f06221 39#include "tree-vectorizer.h"
7ee2468b 40#include "dumpfile.h"
9b2b7279 41#include "builtins.h"
b4e5bc47 42#include "internal-fn.h"
7a31e5ef 43#include "case-cfn-macros.h"
20f06221 44
20f06221 45/* Pattern recognition functions */
355fe088 46static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
51312233 47 tree *);
355fe088 48static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
51312233 49 tree *);
355fe088 50static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
51312233 51 tree *);
355fe088 52static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
79d652a5 53 tree *);
355fe088
TS
54static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
55static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
1107f3ae 56 tree *);
355fe088 57static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
36ba4aae 58 tree *, tree *);
355fe088
TS
59static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
60static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
732a0ad3 61 tree *, tree *);
355fe088 62static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
079c527f 63 tree *, tree *);
47486460 64
355fe088 65static gimple *vect_recog_mult_pattern (vec<gimple *> *,
47486460
VK
66 tree *, tree *);
67
355fe088 68static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
69d2aade 69 tree *, tree *);
355fe088 70static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
e6f5c25d 71static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
0bee0ef9
RB
72
73struct vect_recog_func
74{
75 vect_recog_func_ptr fn;
76 const char *name;
77};
5b723b68
RB
78
79/* Note that ordering matters - the first pattern matching on a stmt
80 is taken which means usually the more complex one needs to preceed
81 the less comples onex (widen_sum only after dot_prod or sad for example). */
0bee0ef9
RB
82static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
83 { vect_recog_widen_mult_pattern, "widen_mult" },
0bee0ef9
RB
84 { vect_recog_dot_prod_pattern, "dot_prod" },
85 { vect_recog_sad_pattern, "sad" },
5b723b68 86 { vect_recog_widen_sum_pattern, "widen_sum" },
0bee0ef9
RB
87 { vect_recog_pow_pattern, "pow" },
88 { vect_recog_widen_shift_pattern, "widen_shift" },
89 { vect_recog_over_widening_pattern, "over_widening" },
90 { vect_recog_rotate_pattern, "rotate" },
91 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
92 { vect_recog_divmod_pattern, "divmod" },
93 { vect_recog_mult_pattern, "mult" },
94 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
95 { vect_recog_bool_pattern, "bool" },
96 { vect_recog_mask_conversion_pattern, "mask_conversion" }
97};
20f06221 98
083481d8 99static inline void
355fe088 100append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
083481d8 101{
a1a6c5b2
JJ
102 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
103 stmt);
083481d8
JJ
104}
105
106static inline void
355fe088 107new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
083481d8
JJ
108{
109 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
110 append_pattern_def_seq (stmt_info, stmt);
111}
112
f71cf56a
UW
113/* Check whether STMT2 is in the same loop or basic block as STMT1.
114 Which of the two applies depends on whether we're currently doing
115 loop-based or basic-block-based vectorization, as determined by
116 the vinfo_for_stmt for STMT1 (which must be defined).
117
118 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
119 to be defined as well. */
120
121static bool
355fe088 122vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
f71cf56a
UW
123{
124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
61d371eb 125 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
f71cf56a
UW
126}
127
9a7a4398
UW
128/* If the LHS of DEF_STMT has a single use, and that statement is
129 in the same loop or basic block, return it. */
130
355fe088
TS
131static gimple *
132vect_single_imm_use (gimple *def_stmt)
9a7a4398
UW
133{
134 tree lhs = gimple_assign_lhs (def_stmt);
135 use_operand_p use_p;
355fe088 136 gimple *use_stmt;
9a7a4398
UW
137
138 if (!single_imm_use (lhs, &use_p, &use_stmt))
139 return NULL;
140
141 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
142 return NULL;
143
144 return use_stmt;
145}
146
bc4fb355 147/* Check whether NAME, an ssa-name used in USE_STMT,
79d652a5 148 is a result of a type promotion, such that:
20f06221 149 DEF_STMT: NAME = NOP (name0)
383d9c83
IR
150 If CHECK_SIGN is TRUE, check that either both types are signed or both are
151 unsigned. */
20f06221
DN
152
153static bool
355fe088
TS
154type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
155 tree *orig_type, gimple **def_stmt, bool *promotion)
20f06221 156{
355fe088 157 gimple *dummy_gimple;
20f06221 158 stmt_vec_info stmt_vinfo;
20f06221
DN
159 tree type = TREE_TYPE (name);
160 tree oprnd0;
161 enum vect_def_type dt;
20f06221
DN
162
163 stmt_vinfo = vinfo_for_stmt (use_stmt);
81c40241 164 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
20f06221
DN
165 return false;
166
8644a673
IR
167 if (dt != vect_internal_def
168 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
169 return false;
170
bc4fb355 171 if (!*def_stmt)
20f06221
DN
172 return false;
173
2a2b8f64
JJ
174 if (dt == vect_internal_def)
175 {
176 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
177 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
178 return false;
179 }
180
726a989a 181 if (!is_gimple_assign (*def_stmt))
20f06221
DN
182 return false;
183
bc4fb355 184 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
20f06221
DN
185 return false;
186
726a989a 187 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221 188
bc4fb355
IR
189 *orig_type = TREE_TYPE (oprnd0);
190 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
191 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
192 return false;
193
194 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
195 *promotion = true;
bc4fb355 196 else
79d652a5 197 *promotion = false;
20f06221 198
81c40241 199 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
20f06221
DN
200 return false;
201
20f06221
DN
202 return true;
203}
204
726a989a
RB
205/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
206 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
207
208static tree
355fe088 209vect_recog_temp_ssa_var (tree type, gimple *stmt)
726a989a 210{
83d5977e 211 return make_temp_ssa_name (type, stmt, "patt");
726a989a 212}
20f06221
DN
213
214/* Function vect_recog_dot_prod_pattern
215
216 Try to find the following pattern:
217
218 type x_t, y_t;
219 TYPE1 prod;
220 TYPE2 sum = init;
221 loop:
222 sum_0 = phi <init, sum_1>
223 S1 x_t = ...
224 S2 y_t = ...
225 S3 x_T = (TYPE1) x_t;
226 S4 y_T = (TYPE1) y_t;
227 S5 prod = x_T * y_T;
228 [S6 prod = (TYPE2) prod; #optional]
229 S7 sum_1 = prod + sum_0;
230
b8698a0f
L
231 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
232 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 233 computation.
b8698a0f 234
20f06221
DN
235 Input:
236
51312233
IR
237 * STMTS: Contains a stmt from which the pattern search begins. In the
238 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
239 will be detected.
20f06221
DN
240
241 Output:
242
243 * TYPE_IN: The type of the input arguments to the pattern.
244
245 * TYPE_OUT: The type of the output of this pattern.
246
247 * Return value: A new stmt that will be used to replace the sequence of
248 stmts that constitute the pattern. In this case it will be:
249 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
250
251 Note: The dot-prod idiom is a widening reduction pattern that is
252 vectorized without preserving all the intermediate results. It
253 produces only N/2 (widened) results (by summing up pairs of
254 intermediate results) rather than all N results. Therefore, we
255 cannot allow this pattern when we want to get all the results and in
256 the correct order (as is the case when this computation is in an
257 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 258
355fe088
TS
259static gimple *
260vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
51312233 261 tree *type_out)
20f06221 262{
355fe088 263 gimple *stmt, *last_stmt = (*stmts)[0];
20f06221
DN
264 tree oprnd0, oprnd1;
265 tree oprnd00, oprnd01;
51312233 266 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 267 tree type, half_type;
355fe088 268 gimple *pattern_stmt;
20f06221 269 tree prod_type;
d29de1bf 270 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 271 struct loop *loop;
f471fe72 272 tree var;
bc4fb355 273 bool promotion;
20f06221 274
f5709183
IR
275 if (!loop_info)
276 return NULL;
277
278 loop = LOOP_VINFO_LOOP (loop_info);
279
328dc477
RB
280 /* We don't allow changing the order of the computation in the inner-loop
281 when doing outer-loop vectorization. */
282 if (loop && nested_in_vect_loop_p (loop, last_stmt))
283 return NULL;
284
51312233 285 if (!is_gimple_assign (last_stmt))
20f06221
DN
286 return NULL;
287
51312233 288 type = gimple_expr_type (last_stmt);
20f06221 289
b8698a0f 290 /* Look for the following pattern
20f06221
DN
291 DX = (TYPE1) X;
292 DY = (TYPE1) Y;
b8698a0f 293 DPROD = DX * DY;
20f06221
DN
294 DDPROD = (TYPE2) DPROD;
295 sum_1 = DDPROD + sum_0;
b8698a0f 296 In which
20f06221
DN
297 - DX is double the size of X
298 - DY is double the size of Y
299 - DX, DY, DPROD all have the same type
300 - sum is the same size of DPROD or bigger
301 - sum has been recognized as a reduction variable.
302
303 This is equivalent to:
304 DPROD = X w* Y; #widen mult
305 sum_1 = DPROD w+ sum_0; #widen summation
306 or
307 DPROD = X w* Y; #widen mult
308 sum_1 = DPROD + sum_0; #summation
309 */
310
311 /* Starting from LAST_STMT, follow the defs of its uses in search
312 of the above pattern. */
313
51312233 314 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
315 return NULL;
316
317 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
318 {
319 /* Has been detected as widening-summation? */
320
321 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a
RB
322 type = gimple_expr_type (stmt);
323 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
20f06221 324 return NULL;
726a989a
RB
325 oprnd0 = gimple_assign_rhs1 (stmt);
326 oprnd1 = gimple_assign_rhs2 (stmt);
20f06221
DN
327 half_type = TREE_TYPE (oprnd0);
328 }
329 else
330 {
355fe088 331 gimple *def_stmt;
20f06221 332
b308d872
RB
333 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
334 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
335 return NULL;
51312233
IR
336 oprnd0 = gimple_assign_rhs1 (last_stmt);
337 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
338 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
339 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221 340 return NULL;
51312233 341 stmt = last_stmt;
20f06221 342
bc4fb355 343 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
2a2b8f64
JJ
344 &promotion)
345 && promotion)
20f06221
DN
346 {
347 stmt = def_stmt;
726a989a 348 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
349 }
350 else
351 half_type = type;
352 }
353
51312233 354 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
355 we know that oprnd1 is the reduction variable (defined by a loop-header
356 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
357 Left to check that oprnd0 is defined by a (widen_)mult_expr */
ba02d3bc
RG
358 if (TREE_CODE (oprnd0) != SSA_NAME)
359 return NULL;
20f06221
DN
360
361 prod_type = half_type;
362 stmt = SSA_NAME_DEF_STMT (oprnd0);
3cb35c12
CF
363
364 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
75264e61 365 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
3cb35c12
CF
366 return NULL;
367
b8698a0f 368 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 369 inside the loop (in case we are analyzing an outer-loop). */
726a989a 370 if (!is_gimple_assign (stmt))
b8698a0f 371 return NULL;
20f06221
DN
372 stmt_vinfo = vinfo_for_stmt (stmt);
373 gcc_assert (stmt_vinfo);
8644a673 374 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
b3130586 375 return NULL;
726a989a 376 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
20f06221
DN
377 return NULL;
378 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
379 {
380 /* Has been detected as a widening multiplication? */
381
382 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a 383 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
20f06221
DN
384 return NULL;
385 stmt_vinfo = vinfo_for_stmt (stmt);
386 gcc_assert (stmt_vinfo);
8644a673 387 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
726a989a
RB
388 oprnd00 = gimple_assign_rhs1 (stmt);
389 oprnd01 = gimple_assign_rhs2 (stmt);
56f8faae
CH
390 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
391 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
20f06221
DN
392 }
393 else
394 {
395 tree half_type0, half_type1;
355fe088 396 gimple *def_stmt;
20f06221
DN
397 tree oprnd0, oprnd1;
398
726a989a
RB
399 oprnd0 = gimple_assign_rhs1 (stmt);
400 oprnd1 = gimple_assign_rhs2 (stmt);
9600efe1
MM
401 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
402 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
20f06221 403 return NULL;
bc4fb355 404 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
2a2b8f64
JJ
405 &promotion)
406 || !promotion)
20f06221 407 return NULL;
726a989a 408 oprnd00 = gimple_assign_rhs1 (def_stmt);
181f5f3e 409 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
2a2b8f64
JJ
410 &promotion)
411 || !promotion)
20f06221 412 return NULL;
726a989a 413 oprnd01 = gimple_assign_rhs1 (def_stmt);
9600efe1 414 if (!types_compatible_p (half_type0, half_type1))
20f06221
DN
415 return NULL;
416 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
417 return NULL;
418 }
419
420 half_type = TREE_TYPE (oprnd00);
421 *type_in = half_type;
422 *type_out = type;
b8698a0f 423
20f06221 424 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 425 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
426 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
427 oprnd00, oprnd01, oprnd1);
b8698a0f 428
73fbfcad 429 if (dump_enabled_p ())
20f06221 430 {
ccb3ad87 431 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 432 "vect_recog_dot_prod_pattern: detected: ");
ccb3ad87 433 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
20f06221 434 }
d29de1bf 435
726a989a 436 return pattern_stmt;
20f06221 437}
b8698a0f 438
51312233 439
79d652a5
CH
440/* Function vect_recog_sad_pattern
441
442 Try to find the following Sum of Absolute Difference (SAD) pattern:
443
444 type x_t, y_t;
445 signed TYPE1 diff, abs_diff;
446 TYPE2 sum = init;
447 loop:
448 sum_0 = phi <init, sum_1>
449 S1 x_t = ...
450 S2 y_t = ...
451 S3 x_T = (TYPE1) x_t;
452 S4 y_T = (TYPE1) y_t;
453 S5 diff = x_T - y_T;
454 S6 abs_diff = ABS_EXPR <diff>;
455 [S7 abs_diff = (TYPE2) abs_diff; #optional]
456 S8 sum_1 = abs_diff + sum_0;
457
458 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
459 same size of 'TYPE1' or bigger. This is a special case of a reduction
460 computation.
461
462 Input:
463
464 * STMTS: Contains a stmt from which the pattern search begins. In the
465 example, when this function is called with S8, the pattern
466 {S3,S4,S5,S6,S7,S8} will be detected.
467
468 Output:
469
470 * TYPE_IN: The type of the input arguments to the pattern.
471
472 * TYPE_OUT: The type of the output of this pattern.
473
474 * Return value: A new stmt that will be used to replace the sequence of
475 stmts that constitute the pattern. In this case it will be:
476 SAD_EXPR <x_t, y_t, sum_0>
477 */
478
355fe088
TS
479static gimple *
480vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
79d652a5
CH
481 tree *type_out)
482{
355fe088 483 gimple *last_stmt = (*stmts)[0];
79d652a5
CH
484 tree sad_oprnd0, sad_oprnd1;
485 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
486 tree half_type;
487 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
488 struct loop *loop;
489 bool promotion;
490
491 if (!loop_info)
492 return NULL;
493
494 loop = LOOP_VINFO_LOOP (loop_info);
495
328dc477
RB
496 /* We don't allow changing the order of the computation in the inner-loop
497 when doing outer-loop vectorization. */
498 if (loop && nested_in_vect_loop_p (loop, last_stmt))
499 return NULL;
500
79d652a5
CH
501 if (!is_gimple_assign (last_stmt))
502 return NULL;
503
504 tree sum_type = gimple_expr_type (last_stmt);
505
506 /* Look for the following pattern
507 DX = (TYPE1) X;
508 DY = (TYPE1) Y;
509 DDIFF = DX - DY;
510 DAD = ABS_EXPR <DDIFF>;
511 DDPROD = (TYPE2) DPROD;
512 sum_1 = DAD + sum_0;
513 In which
514 - DX is at least double the size of X
515 - DY is at least double the size of Y
516 - DX, DY, DDIFF, DAD all have the same type
517 - sum is the same size of DAD or bigger
518 - sum has been recognized as a reduction variable.
519
520 This is equivalent to:
521 DDIFF = X w- Y; #widen sub
522 DAD = ABS_EXPR <DDIFF>;
523 sum_1 = DAD w+ sum_0; #widen summation
524 or
525 DDIFF = X w- Y; #widen sub
526 DAD = ABS_EXPR <DDIFF>;
527 sum_1 = DAD + sum_0; #summation
528 */
529
530 /* Starting from LAST_STMT, follow the defs of its uses in search
531 of the above pattern. */
532
533 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
534 return NULL;
535
536 tree plus_oprnd0, plus_oprnd1;
537
538 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
539 {
540 /* Has been detected as widening-summation? */
541
355fe088 542 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
79d652a5
CH
543 sum_type = gimple_expr_type (stmt);
544 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
545 return NULL;
546 plus_oprnd0 = gimple_assign_rhs1 (stmt);
547 plus_oprnd1 = gimple_assign_rhs2 (stmt);
548 half_type = TREE_TYPE (plus_oprnd0);
549 }
550 else
551 {
355fe088 552 gimple *def_stmt;
79d652a5 553
b308d872
RB
554 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
555 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
556 return NULL;
79d652a5
CH
557 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
558 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
559 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
560 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
561 return NULL;
562
563 /* The type conversion could be promotion, demotion,
564 or just signed -> unsigned. */
565 if (type_conversion_p (plus_oprnd0, last_stmt, false,
566 &half_type, &def_stmt, &promotion))
567 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
568 else
569 half_type = sum_type;
570 }
571
572 /* So far so good. Since last_stmt was detected as a (summation) reduction,
573 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
574 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
575 Then check that plus_oprnd0 is defined by an abs_expr. */
576
577 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
578 return NULL;
579
580 tree abs_type = half_type;
355fe088 581 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
79d652a5
CH
582
583 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
584 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
585 return NULL;
586
587 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
588 inside the loop (in case we are analyzing an outer-loop). */
589 if (!is_gimple_assign (abs_stmt))
590 return NULL;
591
592 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
593 gcc_assert (abs_stmt_vinfo);
594 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
595 return NULL;
596 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
597 return NULL;
598
599 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
600 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
601 return NULL;
602 if (TYPE_UNSIGNED (abs_type))
603 return NULL;
604
605 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
606
607 if (TREE_CODE (abs_oprnd) != SSA_NAME)
608 return NULL;
609
355fe088 610 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
79d652a5
CH
611
612 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
613 if (!gimple_bb (diff_stmt)
614 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
615 return NULL;
616
617 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
618 inside the loop (in case we are analyzing an outer-loop). */
619 if (!is_gimple_assign (diff_stmt))
620 return NULL;
621
622 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
623 gcc_assert (diff_stmt_vinfo);
624 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
625 return NULL;
626 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
627 return NULL;
628
629 tree half_type0, half_type1;
355fe088 630 gimple *def_stmt;
79d652a5
CH
631
632 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
633 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
634
635 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
636 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
637 return NULL;
638 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
639 &half_type0, &def_stmt, &promotion)
640 || !promotion)
641 return NULL;
642 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
643
644 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
645 &half_type1, &def_stmt, &promotion)
646 || !promotion)
647 return NULL;
648 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
649
650 if (!types_compatible_p (half_type0, half_type1))
651 return NULL;
652 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
653 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
654 return NULL;
655
656 *type_in = TREE_TYPE (sad_oprnd0);
657 *type_out = sum_type;
658
659 /* Pattern detected. Create a stmt to be used to replace the pattern: */
660 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
355fe088
TS
661 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
662 sad_oprnd1, plus_oprnd1);
79d652a5
CH
663
664 if (dump_enabled_p ())
665 {
666 dump_printf_loc (MSG_NOTE, vect_location,
667 "vect_recog_sad_pattern: detected: ");
668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
79d652a5
CH
669 }
670
79d652a5
CH
671 return pattern_stmt;
672}
673
674
36ba4aae
IR
675/* Handle widening operation by a constant. At the moment we support MULT_EXPR
676 and LSHIFT_EXPR.
677
678 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
679 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
51312233
IR
680
681 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
36ba4aae
IR
682 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
683 that satisfies the above restrictions, we can perform a widening opeartion
684 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
566d377a 685 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
51312233
IR
686
687static bool
355fe088 688vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
36ba4aae 689 tree const_oprnd, tree *oprnd,
355fe088
TS
690 gimple **wstmt, tree type,
691 tree *half_type, gimple *def_stmt)
51312233 692{
83d5977e 693 tree new_type, new_oprnd;
51312233 694
36ba4aae
IR
695 if (code != MULT_EXPR && code != LSHIFT_EXPR)
696 return false;
697
698 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
699 || (code == LSHIFT_EXPR
700 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
701 != 1))
702 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
51312233
IR
703 {
704 /* CONST_OPRND is a constant of HALF_TYPE. */
705 *oprnd = gimple_assign_rhs1 (def_stmt);
706 return true;
707 }
708
f71cf56a
UW
709 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
710 return false;
711
712 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
51312233
IR
713 return false;
714
36ba4aae 715 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
51312233
IR
716 a type 2 times bigger than HALF_TYPE. */
717 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
718 TYPE_UNSIGNED (type));
36ba4aae
IR
719 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
720 || (code == LSHIFT_EXPR
721 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
51312233
IR
722 return false;
723
566d377a
RB
724 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
725 *oprnd = gimple_assign_rhs1 (def_stmt);
726 new_oprnd = make_ssa_name (new_type);
727 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
728 *oprnd = new_oprnd;
51312233
IR
729
730 *half_type = new_type;
731 return true;
732}
733
734
20f06221
DN
735/* Function vect_recog_widen_mult_pattern
736
737 Try to find the following pattern:
738
d367387c
CH
739 type1 a_t;
740 type2 b_t;
20f06221
DN
741 TYPE a_T, b_T, prod_T;
742
743 S1 a_t = ;
744 S2 b_t = ;
745 S3 a_T = (TYPE) a_t;
746 S4 b_T = (TYPE) b_t;
747 S5 prod_T = a_T * b_T;
748
d367387c 749 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
20f06221 750
d47657bd 751 Also detect unsigned cases:
383d9c83 752
d367387c
CH
753 unsigned type1 a_t;
754 unsigned type2 b_t;
383d9c83
IR
755 unsigned TYPE u_prod_T;
756 TYPE a_T, b_T, prod_T;
757
758 S1 a_t = ;
759 S2 b_t = ;
760 S3 a_T = (TYPE) a_t;
761 S4 b_T = (TYPE) b_t;
762 S5 prod_T = a_T * b_T;
763 S6 u_prod_T = (unsigned TYPE) prod_T;
764
765 and multiplication by constants:
766
767 type a_t;
768 TYPE a_T, prod_T;
769
770 S1 a_t = ;
771 S3 a_T = (TYPE) a_t;
772 S5 prod_T = a_T * CONST;
773
51312233
IR
774 A special case of multiplication by constants is when 'TYPE' is 4 times
775 bigger than 'type', but CONST fits an intermediate type 2 times smaller
776 than 'TYPE'. In that case we create an additional pattern stmt for S3
777 to create a variable of the intermediate type, and perform widen-mult
778 on the intermediate type as well:
779
780 type a_t;
781 interm_type a_it;
782 TYPE a_T, prod_T, prod_T';
783
784 S1 a_t = ;
785 S3 a_T = (TYPE) a_t;
786 '--> a_it = (interm_type) a_t;
787 S5 prod_T = a_T * CONST;
788 '--> prod_T' = a_it w* CONST;
20f06221 789
51312233
IR
790 Input/Output:
791
792 * STMTS: Contains a stmt from which the pattern search begins. In the
793 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
794 is detected. In case of unsigned widen-mult, the original stmt (S5) is
795 replaced with S6 in STMTS. In case of multiplication by a constant
796 of an intermediate type (the last case above), STMTS also contains S3
797 (inserted before S5).
20f06221
DN
798
799 Output:
800
801 * TYPE_IN: The type of the input arguments to the pattern.
802
383d9c83 803 * TYPE_OUT: The type of the output of this pattern.
20f06221
DN
804
805 * Return value: A new stmt that will be used to replace the sequence of
383d9c83 806 stmts that constitute the pattern. In this case it will be:
20f06221 807 WIDEN_MULT <a_t, b_t>
d367387c
CH
808 If the result of WIDEN_MULT needs to be converted to a larger type, the
809 returned stmt will be this type conversion stmt.
20f06221
DN
810*/
811
355fe088
TS
812static gimple *
813vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
51312233 814 tree *type_in, tree *type_out)
20f06221 815{
355fe088
TS
816 gimple *last_stmt = stmts->pop ();
817 gimple *def_stmt0, *def_stmt1;
89d67cca
DN
818 tree oprnd0, oprnd1;
819 tree type, half_type0, half_type1;
355fe088 820 gimple *new_stmt = NULL, *pattern_stmt = NULL;
d367387c 821 tree vectype, vecitype;
726a989a 822 tree var;
89d67cca 823 enum tree_code dummy_code;
5d593372 824 int dummy_int;
9771b263 825 vec<tree> dummy_vec;
36ba4aae 826 bool op1_ok;
bc4fb355 827 bool promotion;
89d67cca 828
51312233 829 if (!is_gimple_assign (last_stmt))
89d67cca
DN
830 return NULL;
831
51312233 832 type = gimple_expr_type (last_stmt);
89d67cca
DN
833
834 /* Starting from LAST_STMT, follow the defs of its uses in search
835 of the above pattern. */
836
51312233 837 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
89d67cca
DN
838 return NULL;
839
51312233
IR
840 oprnd0 = gimple_assign_rhs1 (last_stmt);
841 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
842 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
843 || !types_compatible_p (TREE_TYPE (oprnd1), type))
89d67cca
DN
844 return NULL;
845
383d9c83 846 /* Check argument 0. */
bc4fb355
IR
847 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
848 &promotion)
849 || !promotion)
850 return NULL;
383d9c83 851 /* Check argument 1. */
bc4fb355
IR
852 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
853 &def_stmt1, &promotion);
89d67cca 854
bc4fb355 855 if (op1_ok && promotion)
383d9c83
IR
856 {
857 oprnd0 = gimple_assign_rhs1 (def_stmt0);
858 oprnd1 = gimple_assign_rhs1 (def_stmt1);
859 }
36ba4aae 860 else
383d9c83 861 {
51312233 862 if (TREE_CODE (oprnd1) == INTEGER_CST
383d9c83 863 && TREE_CODE (half_type0) == INTEGER_TYPE
36ba4aae 864 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
566d377a 865 &oprnd0, &new_stmt, type,
36ba4aae 866 &half_type0, def_stmt0))
bfdeda2c
JJ
867 {
868 half_type1 = half_type0;
869 oprnd1 = fold_convert (half_type1, oprnd1);
870 }
383d9c83
IR
871 else
872 return NULL;
873 }
874
d367387c
CH
875 /* If the two arguments have different sizes, convert the one with
876 the smaller type into the larger type. */
877 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
878 {
566d377a
RB
879 /* If we already used up the single-stmt slot give up. */
880 if (new_stmt)
881 return NULL;
882
d367387c 883 tree* oprnd = NULL;
355fe088 884 gimple *def_stmt = NULL;
d367387c
CH
885
886 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
887 {
888 def_stmt = def_stmt0;
889 half_type0 = half_type1;
890 oprnd = &oprnd0;
891 }
892 else
893 {
894 def_stmt = def_stmt1;
895 half_type1 = half_type0;
896 oprnd = &oprnd1;
897 }
898
2a2b8f64
JJ
899 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
900 tree new_oprnd = make_ssa_name (half_type0);
901 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
902 *oprnd = new_oprnd;
d367387c
CH
903 }
904
383d9c83
IR
905 /* Handle unsigned case. Look for
906 S6 u_prod_T = (unsigned TYPE) prod_T;
907 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
908 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
909 {
355fe088 910 gimple *use_stmt;
9a7a4398 911 tree use_lhs;
383d9c83
IR
912 tree use_type;
913
914 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
915 return NULL;
916
9a7a4398
UW
917 use_stmt = vect_single_imm_use (last_stmt);
918 if (!use_stmt || !is_gimple_assign (use_stmt)
625a9766 919 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
383d9c83
IR
920 return NULL;
921
922 use_lhs = gimple_assign_lhs (use_stmt);
923 use_type = TREE_TYPE (use_lhs);
924 if (!INTEGRAL_TYPE_P (use_type)
925 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
926 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
927 return NULL;
928
929 type = use_type;
51312233 930 last_stmt = use_stmt;
383d9c83 931 }
89d67cca 932
9600efe1 933 if (!types_compatible_p (half_type0, half_type1))
89d67cca
DN
934 return NULL;
935
d367387c
CH
936 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
937 to get an intermediate result of type ITYPE. In this case we need
938 to build a statement to convert this intermediate result to type TYPE. */
939 tree itype = type;
940 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
941 itype = build_nonstandard_integer_type
942 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
943 TYPE_UNSIGNED (type));
944
89d67cca 945 /* Pattern detected. */
73fbfcad 946 if (dump_enabled_p ())
ccb3ad87 947 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 948 "vect_recog_widen_mult_pattern: detected:\n");
89d67cca
DN
949
950 /* Check target support */
951 vectype = get_vectype_for_scalar_type (half_type0);
d367387c 952 vecitype = get_vectype_for_scalar_type (itype);
03d3e953 953 if (!vectype
d367387c 954 || !vecitype
51312233 955 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
d367387c 956 vecitype, vectype,
a86ec597
RH
957 &dummy_code, &dummy_code,
958 &dummy_int, &dummy_vec))
89d67cca
DN
959 return NULL;
960
961 *type_in = vectype;
d367387c 962 *type_out = get_vectype_for_scalar_type (type);
89d67cca
DN
963
964 /* Pattern supported. Create a stmt to be used to replace the pattern: */
d367387c 965 var = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 966 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
726a989a 967
d367387c 968 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
d367387c
CH
969 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
970
971 /* If the original two operands have different sizes, we may need to convert
972 the smaller one into the larget type. If this is the case, at this point
973 the new stmt is already built. */
974 if (new_stmt)
975 {
976 append_pattern_def_seq (stmt_vinfo, new_stmt);
977 stmt_vec_info new_stmt_info
310213d4 978 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
d367387c
CH
979 set_vinfo_for_stmt (new_stmt, new_stmt_info);
980 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
981 }
982
983 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
984 the result of the widen-mult operation into type TYPE. */
985 if (itype != type)
986 {
987 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
988 stmt_vec_info pattern_stmt_info
310213d4 989 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
d367387c
CH
990 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
991 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
0d0e4a03
JJ
992 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
993 NOP_EXPR,
994 gimple_assign_lhs (pattern_stmt));
d367387c
CH
995 }
996
73fbfcad 997 if (dump_enabled_p ())
78c60e3d 998 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
726a989a 999
9771b263 1000 stmts->safe_push (last_stmt);
726a989a 1001 return pattern_stmt;
20f06221
DN
1002}
1003
1004
0b2229b0
RG
1005/* Function vect_recog_pow_pattern
1006
1007 Try to find the following pattern:
1008
1009 x = POW (y, N);
1010
1011 with POW being one of pow, powf, powi, powif and N being
1012 either 2 or 0.5.
1013
1014 Input:
1015
1016 * LAST_STMT: A stmt from which the pattern search begins.
1017
1018 Output:
1019
1020 * TYPE_IN: The type of the input arguments to the pattern.
1021
1022 * TYPE_OUT: The type of the output of this pattern.
1023
1024 * Return value: A new stmt that will be used to replace the sequence of
1025 stmts that constitute the pattern. In this case it will be:
726a989a 1026 x = x * x
0b2229b0 1027 or
726a989a 1028 x = sqrt (x)
0b2229b0
RG
1029*/
1030
355fe088
TS
1031static gimple *
1032vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
51312233 1033 tree *type_out)
0b2229b0 1034{
355fe088 1035 gimple *last_stmt = (*stmts)[0];
7a31e5ef 1036 tree base, exp = NULL;
355fe088 1037 gimple *stmt;
726a989a 1038 tree var;
0b2229b0 1039
51312233 1040 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
0b2229b0
RG
1041 return NULL;
1042
7a31e5ef 1043 switch (gimple_call_combined_fn (last_stmt))
0b2229b0 1044 {
7a31e5ef
RS
1045 CASE_CFN_POW:
1046 CASE_CFN_POWI:
51312233
IR
1047 base = gimple_call_arg (last_stmt, 0);
1048 exp = gimple_call_arg (last_stmt, 1);
0b2229b0
RG
1049 if (TREE_CODE (exp) != REAL_CST
1050 && TREE_CODE (exp) != INTEGER_CST)
726a989a 1051 return NULL;
0b2229b0
RG
1052 break;
1053
726a989a
RB
1054 default:
1055 return NULL;
0b2229b0
RG
1056 }
1057
1058 /* We now have a pow or powi builtin function call with a constant
1059 exponent. */
1060
0b2229b0
RG
1061 *type_out = NULL_TREE;
1062
1063 /* Catch squaring. */
9541ffee 1064 if ((tree_fits_shwi_p (exp)
9439e9a1 1065 && tree_to_shwi (exp) == 2)
0b2229b0 1066 || (TREE_CODE (exp) == REAL_CST
624d31fe 1067 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
c6b1b49b
RG
1068 {
1069 *type_in = TREE_TYPE (base);
726a989a
RB
1070
1071 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
0d0e4a03 1072 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
726a989a 1073 return stmt;
c6b1b49b 1074 }
0b2229b0
RG
1075
1076 /* Catch square root. */
1077 if (TREE_CODE (exp) == REAL_CST
624d31fe 1078 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
0b2229b0 1079 {
c6b1b49b 1080 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
d95ab70a
RS
1081 if (*type_in
1082 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1083 OPTIMIZE_FOR_SPEED))
c6b1b49b 1084 {
b4e5bc47
RS
1085 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1086 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1087 gimple_call_set_lhs (stmt, var);
1088 return stmt;
c6b1b49b 1089 }
0b2229b0
RG
1090 }
1091
726a989a 1092 return NULL;
0b2229b0
RG
1093}
1094
1095
20f06221
DN
1096/* Function vect_recog_widen_sum_pattern
1097
1098 Try to find the following pattern:
1099
b8698a0f 1100 type x_t;
20f06221
DN
1101 TYPE x_T, sum = init;
1102 loop:
1103 sum_0 = phi <init, sum_1>
1104 S1 x_t = *p;
1105 S2 x_T = (TYPE) x_t;
1106 S3 sum_1 = x_T + sum_0;
1107
b8698a0f 1108 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 1109 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 1110 a special case of a reduction computation.
20f06221
DN
1111
1112 Input:
1113
1114 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1115 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 1116
20f06221 1117 Output:
b8698a0f 1118
20f06221
DN
1119 * TYPE_IN: The type of the input arguments to the pattern.
1120
1121 * TYPE_OUT: The type of the output of this pattern.
1122
1123 * Return value: A new stmt that will be used to replace the sequence of
1124 stmts that constitute the pattern. In this case it will be:
1125 WIDEN_SUM <x_t, sum_0>
d29de1bf 1126
b8698a0f 1127 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 1128 vectorized without preserving all the intermediate results. It
b8698a0f
L
1129 produces only N/2 (widened) results (by summing up pairs of
1130 intermediate results) rather than all N results. Therefore, we
1131 cannot allow this pattern when we want to get all the results and in
1132 the correct order (as is the case when this computation is in an
d29de1bf 1133 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 1134
355fe088
TS
1135static gimple *
1136vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
51312233 1137 tree *type_out)
20f06221 1138{
355fe088 1139 gimple *stmt, *last_stmt = (*stmts)[0];
20f06221 1140 tree oprnd0, oprnd1;
51312233 1141 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 1142 tree type, half_type;
355fe088 1143 gimple *pattern_stmt;
d29de1bf 1144 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 1145 struct loop *loop;
726a989a 1146 tree var;
bc4fb355 1147 bool promotion;
20f06221 1148
f5709183
IR
1149 if (!loop_info)
1150 return NULL;
1151
1152 loop = LOOP_VINFO_LOOP (loop_info);
1153
328dc477
RB
1154 /* We don't allow changing the order of the computation in the inner-loop
1155 when doing outer-loop vectorization. */
1156 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1157 return NULL;
1158
51312233 1159 if (!is_gimple_assign (last_stmt))
20f06221
DN
1160 return NULL;
1161
51312233 1162 type = gimple_expr_type (last_stmt);
20f06221
DN
1163
1164 /* Look for the following pattern
1165 DX = (TYPE) X;
1166 sum_1 = DX + sum_0;
1167 In which DX is at least double the size of X, and sum_1 has been
1168 recognized as a reduction variable.
1169 */
1170
1171 /* Starting from LAST_STMT, follow the defs of its uses in search
1172 of the above pattern. */
1173
51312233 1174 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
1175 return NULL;
1176
b308d872
RB
1177 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
1178 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
1179 return NULL;
1180
51312233
IR
1181 oprnd0 = gimple_assign_rhs1 (last_stmt);
1182 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
1183 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1184 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221
DN
1185 return NULL;
1186
51312233 1187 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
1188 we know that oprnd1 is the reduction variable (defined by a loop-header
1189 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1190 Left to check that oprnd0 is defined by a cast from type 'type' to type
1191 'TYPE'. */
1192
bc4fb355
IR
1193 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1194 &promotion)
1195 || !promotion)
1196 return NULL;
20f06221 1197
726a989a 1198 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
1199 *type_in = half_type;
1200 *type_out = type;
1201
1202 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 1203 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1204 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
726a989a 1205
73fbfcad 1206 if (dump_enabled_p ())
20f06221 1207 {
ccb3ad87 1208 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1209 "vect_recog_widen_sum_pattern: detected: ");
ccb3ad87 1210 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
20f06221 1211 }
d29de1bf 1212
726a989a 1213 return pattern_stmt;
20f06221
DN
1214}
1215
1216
1107f3ae
IR
1217/* Return TRUE if the operation in STMT can be performed on a smaller type.
1218
1219 Input:
1220 STMT - a statement to check.
1221 DEF - we support operations with two operands, one of which is constant.
1222 The other operand can be defined by a demotion operation, or by a
1223 previous statement in a sequence of over-promoted operations. In the
1224 later case DEF is used to replace that operand. (It is defined by a
1225 pattern statement we created for the previous statement in the
1226 sequence).
1227
1228 Input/output:
1229 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1230 NULL, it's the type of DEF.
1231 STMTS - additional pattern statements. If a pattern statement (type
1232 conversion) is created in this function, its original statement is
1233 added to STMTS.
1234
1235 Output:
1236 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1237 operands to use in the new pattern statement for STMT (will be created
1238 in vect_recog_over_widening_pattern ()).
1239 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1240 statements for STMT: the first one is a type promotion and the second
1241 one is the operation itself. We return the type promotion statement
363477c0 1242 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1107f3ae
IR
1243 the second pattern statement. */
1244
1245static bool
355fe088
TS
1246vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1247 tree *op0, tree *op1, gimple **new_def_stmt,
1248 vec<gimple *> *stmts)
1107f3ae
IR
1249{
1250 enum tree_code code;
1251 tree const_oprnd, oprnd;
83d5977e 1252 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
355fe088 1253 gimple *def_stmt, *new_stmt;
1107f3ae 1254 bool first = false;
bc4fb355 1255 bool promotion;
f5709183 1256
d6e1acf6
JJ
1257 *op0 = NULL_TREE;
1258 *op1 = NULL_TREE;
1107f3ae
IR
1259 *new_def_stmt = NULL;
1260
1261 if (!is_gimple_assign (stmt))
1262 return false;
1263
1264 code = gimple_assign_rhs_code (stmt);
1265 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1266 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1267 return false;
1268
1269 oprnd = gimple_assign_rhs1 (stmt);
1270 const_oprnd = gimple_assign_rhs2 (stmt);
1271 type = gimple_expr_type (stmt);
1272
1273 if (TREE_CODE (oprnd) != SSA_NAME
1274 || TREE_CODE (const_oprnd) != INTEGER_CST)
1275 return false;
1276
9ef7adc0
RG
1277 /* If oprnd has other uses besides that in stmt we cannot mark it
1278 as being part of a pattern only. */
1279 if (!has_single_use (oprnd))
1280 return false;
1281
1107f3ae
IR
1282 /* If we are in the middle of a sequence, we use DEF from a previous
1283 statement. Otherwise, OPRND has to be a result of type promotion. */
1284 if (*new_type)
1285 {
1286 half_type = *new_type;
1287 oprnd = def;
1288 }
1289 else
1290 {
1291 first = true;
bc4fb355 1292 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
f71cf56a
UW
1293 &promotion)
1294 || !promotion
1295 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1107f3ae
IR
1296 return false;
1297 }
1298
1299 /* Can we perform the operation on a smaller type? */
1300 switch (code)
1301 {
1302 case BIT_IOR_EXPR:
1303 case BIT_XOR_EXPR:
1304 case BIT_AND_EXPR:
1305 if (!int_fits_type_p (const_oprnd, half_type))
1306 {
1307 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1308 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1309 return false;
1310
1311 interm_type = build_nonstandard_integer_type (
1312 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1313 if (!int_fits_type_p (const_oprnd, interm_type))
1314 return false;
1315 }
1316
1317 break;
1318
1319 case LSHIFT_EXPR:
1320 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1321 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1322 return false;
1323
1324 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1325 (e.g., if the original value was char, the shift amount is at most 8
1326 if we want to use short). */
1327 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1328 return false;
1329
1330 interm_type = build_nonstandard_integer_type (
1331 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1332
1333 if (!vect_supportable_shift (code, interm_type))
1334 return false;
1335
1336 break;
1337
1338 case RSHIFT_EXPR:
1339 if (vect_supportable_shift (code, half_type))
1340 break;
1341
1342 /* Try intermediate type - HALF_TYPE is not supported. */
1343 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1344 return false;
1345
1346 interm_type = build_nonstandard_integer_type (
1347 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1348
1349 if (!vect_supportable_shift (code, interm_type))
1350 return false;
1351
1352 break;
1353
1354 default:
1355 gcc_unreachable ();
1356 }
1357
1358 /* There are four possible cases:
1359 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1360 the first statement in the sequence)
1361 a. The original, HALF_TYPE, is not enough - we replace the promotion
1362 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1363 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1364 promotion.
1365 2. OPRND is defined by a pattern statement we created.
1366 a. Its type is not sufficient for the operation, we create a new stmt:
1367 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1368 this statement in NEW_DEF_STMT, and it is later put in
363477c0 1369 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1107f3ae
IR
1370 b. OPRND is good to use in the new statement. */
1371 if (first)
1372 {
1373 if (interm_type)
1374 {
1375 /* Replace the original type conversion HALF_TYPE->TYPE with
1376 HALF_TYPE->INTERM_TYPE. */
1377 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1378 {
1379 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1380 /* Check if the already created pattern stmt is what we need. */
1381 if (!is_gimple_assign (new_stmt)
625a9766 1382 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1107f3ae
IR
1383 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1384 return false;
1385
9771b263 1386 stmts->safe_push (def_stmt);
1107f3ae
IR
1387 oprnd = gimple_assign_lhs (new_stmt);
1388 }
1389 else
1390 {
1391 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1392 oprnd = gimple_assign_rhs1 (def_stmt);
b731b390 1393 new_oprnd = make_ssa_name (interm_type);
0d0e4a03 1394 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1107f3ae 1395 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
9771b263 1396 stmts->safe_push (def_stmt);
1107f3ae
IR
1397 oprnd = new_oprnd;
1398 }
1399 }
1400 else
1401 {
1402 /* Retrieve the operand before the type promotion. */
1403 oprnd = gimple_assign_rhs1 (def_stmt);
1404 }
1405 }
1406 else
1407 {
1408 if (interm_type)
1409 {
1410 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
b731b390 1411 new_oprnd = make_ssa_name (interm_type);
0d0e4a03 1412 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1107f3ae
IR
1413 oprnd = new_oprnd;
1414 *new_def_stmt = new_stmt;
1415 }
1416
1417 /* Otherwise, OPRND is already set. */
1418 }
1419
1420 if (interm_type)
1421 *new_type = interm_type;
1422 else
1423 *new_type = half_type;
1424
1425 *op0 = oprnd;
1426 *op1 = fold_convert (*new_type, const_oprnd);
1427
1428 return true;
1429}
1430
1431
1432/* Try to find a statement or a sequence of statements that can be performed
1433 on a smaller type:
1434
1435 type x_t;
1436 TYPE x_T, res0_T, res1_T;
1437 loop:
1438 S1 x_t = *p;
1439 S2 x_T = (TYPE) x_t;
1440 S3 res0_T = op (x_T, C0);
1441 S4 res1_T = op (res0_T, C1);
1442 S5 ... = () res1_T; - type demotion
1443
1444 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1445 constants.
1446 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1447 be 'type' or some intermediate type. For now, we expect S5 to be a type
71c92d17 1448 demotion operation. We also check that S3 and S4 have only one use. */
1107f3ae 1449
355fe088
TS
1450static gimple *
1451vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1107f3ae
IR
1452 tree *type_in, tree *type_out)
1453{
355fe088
TS
1454 gimple *stmt = stmts->pop ();
1455 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1456 *use_stmt = NULL;
9a7a4398 1457 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
83d5977e 1458 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1107f3ae 1459 bool first;
b2a1a74d 1460 tree type = NULL;
1107f3ae
IR
1461
1462 first = true;
1463 while (1)
1464 {
1465 if (!vinfo_for_stmt (stmt)
1466 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1467 return NULL;
1468
1469 new_def_stmt = NULL;
1470 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1471 &op0, &op1, &new_def_stmt,
1472 stmts))
1473 {
1474 if (first)
1475 return NULL;
1476 else
1477 break;
1478 }
1479
1480 /* STMT can be performed on a smaller type. Check its uses. */
9a7a4398
UW
1481 use_stmt = vect_single_imm_use (stmt);
1482 if (!use_stmt || !is_gimple_assign (use_stmt))
1107f3ae
IR
1483 return NULL;
1484
1485 /* Create pattern statement for STMT. */
1486 vectype = get_vectype_for_scalar_type (new_type);
1487 if (!vectype)
1488 return NULL;
1489
1490 /* We want to collect all the statements for which we create pattern
1491 statetments, except for the case when the last statement in the
1492 sequence doesn't have a corresponding pattern statement. In such
1493 case we associate the last pattern statement with the last statement
36ba4aae 1494 in the sequence. Therefore, we only add the original statement to
1107f3ae
IR
1495 the list if we know that it is not the last. */
1496 if (prev_stmt)
9771b263 1497 stmts->safe_push (prev_stmt);
1107f3ae
IR
1498
1499 var = vect_recog_temp_ssa_var (new_type, NULL);
62371b92 1500 pattern_stmt
0d0e4a03 1501 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1107f3ae 1502 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
083481d8 1503 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1107f3ae 1504
73fbfcad 1505 if (dump_enabled_p ())
1107f3ae 1506 {
ccb3ad87 1507 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1508 "created pattern stmt: ");
ccb3ad87 1509 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1107f3ae
IR
1510 }
1511
b2a1a74d 1512 type = gimple_expr_type (stmt);
1107f3ae
IR
1513 prev_stmt = stmt;
1514 stmt = use_stmt;
1515
1516 first = false;
1517 }
1518
1519 /* We got a sequence. We expect it to end with a type demotion operation.
1520 Otherwise, we quit (for now). There are three possible cases: the
1521 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1522 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1523 NEW_TYPE differs (we create a new conversion statement). */
1524 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1525 {
1526 use_lhs = gimple_assign_lhs (use_stmt);
1527 use_type = TREE_TYPE (use_lhs);
82db3d43 1528 /* Support only type demotion or signedess change. */
1107f3ae 1529 if (!INTEGRAL_TYPE_P (use_type)
82db3d43 1530 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1107f3ae
IR
1531 return NULL;
1532
82db3d43
IR
1533 /* Check that NEW_TYPE is not bigger than the conversion result. */
1534 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1535 return NULL;
1536
1107f3ae
IR
1537 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1538 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1539 {
1540 /* Create NEW_TYPE->USE_TYPE conversion. */
b731b390 1541 new_oprnd = make_ssa_name (use_type);
0d0e4a03 1542 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1107f3ae
IR
1543 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1544
1545 *type_in = get_vectype_for_scalar_type (new_type);
1546 *type_out = get_vectype_for_scalar_type (use_type);
1547
1548 /* We created a pattern statement for the last statement in the
1549 sequence, so we don't need to associate it with the pattern
1550 statement created for PREV_STMT. Therefore, we add PREV_STMT
1551 to the list in order to mark it later in vect_pattern_recog_1. */
1552 if (prev_stmt)
9771b263 1553 stmts->safe_push (prev_stmt);
1107f3ae
IR
1554 }
1555 else
1556 {
1557 if (prev_stmt)
363477c0
JJ
1558 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1559 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1107f3ae
IR
1560
1561 *type_in = vectype;
1562 *type_out = NULL_TREE;
1563 }
1564
9771b263 1565 stmts->safe_push (use_stmt);
1107f3ae
IR
1566 }
1567 else
1568 /* TODO: support general case, create a conversion to the correct type. */
1569 return NULL;
1570
1571 /* Pattern detected. */
73fbfcad 1572 if (dump_enabled_p ())
1107f3ae 1573 {
ccb3ad87 1574 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1575 "vect_recog_over_widening_pattern: detected: ");
ccb3ad87 1576 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1107f3ae
IR
1577 }
1578
1579 return pattern_stmt;
1580}
1581
36ba4aae
IR
1582/* Detect widening shift pattern:
1583
1584 type a_t;
1585 TYPE a_T, res_T;
1586
1587 S1 a_t = ;
1588 S2 a_T = (TYPE) a_t;
1589 S3 res_T = a_T << CONST;
1590
1591 where type 'TYPE' is at least double the size of type 'type'.
1592
33018845
UW
1593 Also detect cases where the shift result is immediately converted
1594 to another type 'result_type' that is no larger in size than 'TYPE'.
1595 In those cases we perform a widen-shift that directly results in
1596 'result_type', to avoid a possible over-widening situation:
36ba4aae 1597
33018845 1598 type a_t;
36ba4aae 1599 TYPE a_T, res_T;
33018845 1600 result_type res_result;
36ba4aae
IR
1601
1602 S1 a_t = ;
1603 S2 a_T = (TYPE) a_t;
1604 S3 res_T = a_T << CONST;
33018845
UW
1605 S4 res_result = (result_type) res_T;
1606 '--> res_result' = a_t w<< CONST;
36ba4aae
IR
1607
1608 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1609 create an additional pattern stmt for S2 to create a variable of an
1610 intermediate type, and perform widen-shift on the intermediate type:
1611
1612 type a_t;
1613 interm_type a_it;
1614 TYPE a_T, res_T, res_T';
1615
1616 S1 a_t = ;
1617 S2 a_T = (TYPE) a_t;
1618 '--> a_it = (interm_type) a_t;
1619 S3 res_T = a_T << CONST;
1620 '--> res_T' = a_it <<* CONST;
1621
1622 Input/Output:
1623
1624 * STMTS: Contains a stmt from which the pattern search begins.
1625 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1626 in STMTS. When an intermediate type is used and a pattern statement is
1627 created for S2, we also put S2 here (before S3).
1628
1629 Output:
1630
1631 * TYPE_IN: The type of the input arguments to the pattern.
1632
1633 * TYPE_OUT: The type of the output of this pattern.
1634
1635 * Return value: A new stmt that will be used to replace the sequence of
1636 stmts that constitute the pattern. In this case it will be:
1637 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1638
355fe088
TS
1639static gimple *
1640vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
36ba4aae
IR
1641 tree *type_in, tree *type_out)
1642{
355fe088
TS
1643 gimple *last_stmt = stmts->pop ();
1644 gimple *def_stmt0;
36ba4aae
IR
1645 tree oprnd0, oprnd1;
1646 tree type, half_type0;
355fe088 1647 gimple *pattern_stmt;
36ba4aae 1648 tree vectype, vectype_out = NULL_TREE;
36ba4aae
IR
1649 tree var;
1650 enum tree_code dummy_code;
1651 int dummy_int;
9771b263 1652 vec<tree> dummy_vec;
355fe088 1653 gimple *use_stmt;
bc4fb355 1654 bool promotion;
36ba4aae
IR
1655
1656 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1657 return NULL;
1658
36ba4aae 1659 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
33018845 1660 return NULL;
36ba4aae
IR
1661
1662 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1663 return NULL;
1664
1665 oprnd0 = gimple_assign_rhs1 (last_stmt);
1666 oprnd1 = gimple_assign_rhs2 (last_stmt);
1667 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1668 return NULL;
1669
1670 /* Check operand 0: it has to be defined by a type promotion. */
bc4fb355 1671 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
566d377a 1672 &promotion)
bc4fb355
IR
1673 || !promotion)
1674 return NULL;
36ba4aae
IR
1675
1676 /* Check operand 1: has to be positive. We check that it fits the type
1677 in vect_handle_widen_op_by_const (). */
1678 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1679 return NULL;
1680
1681 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1682 type = gimple_expr_type (last_stmt);
1683
33018845
UW
1684 /* Check for subsequent conversion to another type. */
1685 use_stmt = vect_single_imm_use (last_stmt);
1686 if (use_stmt && is_gimple_assign (use_stmt)
1687 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1688 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1689 {
1690 tree use_lhs = gimple_assign_lhs (use_stmt);
1691 tree use_type = TREE_TYPE (use_lhs);
1692
1693 if (INTEGRAL_TYPE_P (use_type)
1694 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1695 {
1696 last_stmt = use_stmt;
1697 type = use_type;
1698 }
1699 }
1700
36ba4aae 1701 /* Check if this a widening operation. */
355fe088 1702 gimple *wstmt = NULL;
36ba4aae 1703 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
566d377a 1704 &oprnd0, &wstmt,
36ba4aae
IR
1705 type, &half_type0, def_stmt0))
1706 return NULL;
1707
36ba4aae 1708 /* Pattern detected. */
73fbfcad 1709 if (dump_enabled_p ())
ccb3ad87 1710 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1711 "vect_recog_widen_shift_pattern: detected:\n");
36ba4aae
IR
1712
1713 /* Check target support. */
1714 vectype = get_vectype_for_scalar_type (half_type0);
1715 vectype_out = get_vectype_for_scalar_type (type);
1716
1717 if (!vectype
1718 || !vectype_out
1719 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1720 vectype_out, vectype,
a86ec597
RH
1721 &dummy_code, &dummy_code,
1722 &dummy_int, &dummy_vec))
36ba4aae
IR
1723 return NULL;
1724
1725 *type_in = vectype;
1726 *type_out = vectype_out;
1727
1728 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1729 var = vect_recog_temp_ssa_var (type, NULL);
1730 pattern_stmt =
0d0e4a03 1731 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
566d377a
RB
1732 if (wstmt)
1733 {
1734 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
566d377a
RB
1735 new_pattern_def_seq (stmt_vinfo, wstmt);
1736 stmt_vec_info new_stmt_info
310213d4 1737 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
566d377a
RB
1738 set_vinfo_for_stmt (wstmt, new_stmt_info);
1739 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1740 }
36ba4aae 1741
73fbfcad 1742 if (dump_enabled_p ())
78c60e3d 1743 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
7e9a3abb
JJ
1744
1745 stmts->safe_push (last_stmt);
1746 return pattern_stmt;
1747}
1748
1749/* Detect a rotate pattern wouldn't be otherwise vectorized:
1750
1751 type a_t, b_t, c_t;
1752
1753 S0 a_t = b_t r<< c_t;
1754
1755 Input/Output:
1756
1757 * STMTS: Contains a stmt from which the pattern search begins,
1758 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1759 with a sequence:
1760
1761 S1 d_t = -c_t;
1762 S2 e_t = d_t & (B - 1);
1763 S3 f_t = b_t << c_t;
1764 S4 g_t = b_t >> e_t;
1765 S0 a_t = f_t | g_t;
1766
1767 where B is element bitsize of type.
1768
1769 Output:
1770
1771 * TYPE_IN: The type of the input arguments to the pattern.
1772
1773 * TYPE_OUT: The type of the output of this pattern.
1774
1775 * Return value: A new stmt that will be used to replace the rotate
1776 S0 stmt. */
1777
355fe088
TS
1778static gimple *
1779vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
7e9a3abb 1780{
355fe088 1781 gimple *last_stmt = stmts->pop ();
7e9a3abb 1782 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
355fe088 1783 gimple *pattern_stmt, *def_stmt;
7e9a3abb
JJ
1784 enum tree_code rhs_code;
1785 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
310213d4 1786 vec_info *vinfo = stmt_vinfo->vinfo;
7e9a3abb
JJ
1787 enum vect_def_type dt;
1788 optab optab1, optab2;
68119618 1789 edge ext_def = NULL;
7e9a3abb
JJ
1790
1791 if (!is_gimple_assign (last_stmt))
1792 return NULL;
1793
1794 rhs_code = gimple_assign_rhs_code (last_stmt);
1795 switch (rhs_code)
1796 {
1797 case LROTATE_EXPR:
1798 case RROTATE_EXPR:
1799 break;
1800 default:
1801 return NULL;
1802 }
1803
1804 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1805 return NULL;
1806
1807 lhs = gimple_assign_lhs (last_stmt);
1808 oprnd0 = gimple_assign_rhs1 (last_stmt);
1809 type = TREE_TYPE (oprnd0);
1810 oprnd1 = gimple_assign_rhs2 (last_stmt);
1811 if (TREE_CODE (oprnd0) != SSA_NAME
1812 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1813 || !INTEGRAL_TYPE_P (type)
1814 || !TYPE_UNSIGNED (type))
1815 return NULL;
1816
81c40241 1817 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
7e9a3abb
JJ
1818 return NULL;
1819
1820 if (dt != vect_internal_def
1821 && dt != vect_constant_def
1822 && dt != vect_external_def)
1823 return NULL;
1824
1825 vectype = get_vectype_for_scalar_type (type);
1826 if (vectype == NULL_TREE)
1827 return NULL;
1828
1829 /* If vector/vector or vector/scalar rotate is supported by the target,
1830 don't do anything here. */
1831 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1832 if (optab1
1833 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1834 return NULL;
1835
310213d4 1836 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
7e9a3abb
JJ
1837 {
1838 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1839 if (optab2
1840 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1841 return NULL;
1842 }
1843
1844 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1845 don't do anything here either. */
1846 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1847 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1848 if (!optab1
1849 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1850 || !optab2
1851 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1852 {
310213d4 1853 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
7e9a3abb
JJ
1854 return NULL;
1855 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1856 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1857 if (!optab1
1858 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1859 || !optab2
1860 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1861 return NULL;
1862 }
1863
1864 *type_in = vectype;
1865 *type_out = vectype;
1866 if (*type_in == NULL_TREE)
1867 return NULL;
1868
68119618
JJ
1869 if (dt == vect_external_def
1870 && TREE_CODE (oprnd1) == SSA_NAME
310213d4 1871 && is_a <loop_vec_info> (vinfo))
68119618 1872 {
310213d4 1873 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
68119618
JJ
1874 ext_def = loop_preheader_edge (loop);
1875 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1876 {
1877 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1878 if (bb == NULL
1879 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1880 ext_def = NULL;
1881 }
1882 }
1883
7e9a3abb
JJ
1884 def = NULL_TREE;
1885 if (TREE_CODE (oprnd1) == INTEGER_CST
1886 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1887 def = oprnd1;
1888 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1889 {
1890 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1891 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1892 && TYPE_PRECISION (TREE_TYPE (rhs1))
1893 == TYPE_PRECISION (type))
1894 def = rhs1;
1895 }
1896
1897 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1898 if (def == NULL_TREE)
1899 {
1900 def = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1901 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
68119618
JJ
1902 if (ext_def)
1903 {
1904 basic_block new_bb
1905 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1906 gcc_assert (!new_bb);
1907 }
1908 else
1909 append_pattern_def_seq (stmt_vinfo, def_stmt);
7e9a3abb
JJ
1910 }
1911 stype = TREE_TYPE (def);
1912
1913 if (TREE_CODE (def) == INTEGER_CST)
1914 {
cc269bb6 1915 if (!tree_fits_uhwi_p (def)
7d362f6c 1916 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
7e9a3abb
JJ
1917 || integer_zerop (def))
1918 return NULL;
1919 def2 = build_int_cst (stype,
1920 GET_MODE_PRECISION (TYPE_MODE (type))
ae7e9ddd 1921 - tree_to_uhwi (def));
7e9a3abb
JJ
1922 }
1923 else
1924 {
1925 tree vecstype = get_vectype_for_scalar_type (stype);
1926 stmt_vec_info def_stmt_vinfo;
1927
1928 if (vecstype == NULL_TREE)
1929 return NULL;
1930 def2 = vect_recog_temp_ssa_var (stype, NULL);
0d0e4a03 1931 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
68119618
JJ
1932 if (ext_def)
1933 {
1934 basic_block new_bb
1935 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1936 gcc_assert (!new_bb);
1937 }
1938 else
1939 {
310213d4 1940 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
68119618
JJ
1941 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1942 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1943 append_pattern_def_seq (stmt_vinfo, def_stmt);
1944 }
7e9a3abb
JJ
1945
1946 def2 = vect_recog_temp_ssa_var (stype, NULL);
1947 tree mask
1948 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
0d0e4a03
JJ
1949 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1950 gimple_assign_lhs (def_stmt), mask);
68119618
JJ
1951 if (ext_def)
1952 {
1953 basic_block new_bb
1954 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1955 gcc_assert (!new_bb);
1956 }
1957 else
1958 {
310213d4 1959 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
68119618
JJ
1960 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1961 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1962 append_pattern_def_seq (stmt_vinfo, def_stmt);
1963 }
7e9a3abb
JJ
1964 }
1965
1966 var1 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
1967 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1968 ? LSHIFT_EXPR : RSHIFT_EXPR,
1969 oprnd0, def);
7e9a3abb
JJ
1970 append_pattern_def_seq (stmt_vinfo, def_stmt);
1971
1972 var2 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
1973 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1974 ? RSHIFT_EXPR : LSHIFT_EXPR,
1975 oprnd0, def2);
7e9a3abb
JJ
1976 append_pattern_def_seq (stmt_vinfo, def_stmt);
1977
1978 /* Pattern detected. */
1979 if (dump_enabled_p ())
1980 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1981 "vect_recog_rotate_pattern: detected:\n");
7e9a3abb
JJ
1982
1983 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1984 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1985 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
7e9a3abb
JJ
1986
1987 if (dump_enabled_p ())
1988 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
36ba4aae 1989
9771b263 1990 stmts->safe_push (last_stmt);
36ba4aae
IR
1991 return pattern_stmt;
1992}
1107f3ae 1993
732a0ad3
JJ
1994/* Detect a vector by vector shift pattern that wouldn't be otherwise
1995 vectorized:
1996
1997 type a_t;
1998 TYPE b_T, res_T;
1999
2000 S1 a_t = ;
2001 S2 b_T = ;
2002 S3 res_T = b_T op a_t;
2003
2004 where type 'TYPE' is a type with different size than 'type',
2005 and op is <<, >> or rotate.
2006
2007 Also detect cases:
2008
2009 type a_t;
2010 TYPE b_T, c_T, res_T;
2011
2012 S0 c_T = ;
2013 S1 a_t = (type) c_T;
2014 S2 b_T = ;
2015 S3 res_T = b_T op a_t;
2016
2017 Input/Output:
2018
2019 * STMTS: Contains a stmt from which the pattern search begins,
2020 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2021 with a shift/rotate which has same type on both operands, in the
2022 second case just b_T op c_T, in the first case with added cast
363477c0 2023 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
732a0ad3
JJ
2024
2025 Output:
2026
2027 * TYPE_IN: The type of the input arguments to the pattern.
2028
2029 * TYPE_OUT: The type of the output of this pattern.
2030
2031 * Return value: A new stmt that will be used to replace the shift/rotate
2032 S3 stmt. */
2033
355fe088
TS
2034static gimple *
2035vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
732a0ad3
JJ
2036 tree *type_in, tree *type_out)
2037{
355fe088 2038 gimple *last_stmt = stmts->pop ();
732a0ad3 2039 tree oprnd0, oprnd1, lhs, var;
355fe088 2040 gimple *pattern_stmt, *def_stmt;
732a0ad3
JJ
2041 enum tree_code rhs_code;
2042 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
310213d4 2043 vec_info *vinfo = stmt_vinfo->vinfo;
732a0ad3 2044 enum vect_def_type dt;
732a0ad3
JJ
2045
2046 if (!is_gimple_assign (last_stmt))
2047 return NULL;
2048
2049 rhs_code = gimple_assign_rhs_code (last_stmt);
2050 switch (rhs_code)
2051 {
2052 case LSHIFT_EXPR:
2053 case RSHIFT_EXPR:
2054 case LROTATE_EXPR:
2055 case RROTATE_EXPR:
2056 break;
2057 default:
2058 return NULL;
2059 }
2060
2061 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2062 return NULL;
2063
2064 lhs = gimple_assign_lhs (last_stmt);
2065 oprnd0 = gimple_assign_rhs1 (last_stmt);
2066 oprnd1 = gimple_assign_rhs2 (last_stmt);
2067 if (TREE_CODE (oprnd0) != SSA_NAME
2068 || TREE_CODE (oprnd1) != SSA_NAME
2069 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2070 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2071 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2072 || TYPE_PRECISION (TREE_TYPE (lhs))
2073 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2074 return NULL;
2075
81c40241 2076 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
732a0ad3
JJ
2077 return NULL;
2078
2079 if (dt != vect_internal_def)
2080 return NULL;
2081
2082 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2083 *type_out = *type_in;
2084 if (*type_in == NULL_TREE)
2085 return NULL;
2086
81c40241 2087 tree def = NULL_TREE;
97ecdb46
JJ
2088 stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2089 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
732a0ad3
JJ
2090 {
2091 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2092 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2093 && TYPE_PRECISION (TREE_TYPE (rhs1))
2094 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
0179520a
JJ
2095 {
2096 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2097 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2098 def = rhs1;
2099 else
2100 {
2101 tree mask
2102 = build_low_bits_mask (TREE_TYPE (rhs1),
2103 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2104 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2105 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2106 new_pattern_def_seq (stmt_vinfo, def_stmt);
2107 }
2108 }
732a0ad3
JJ
2109 }
2110
2111 if (def == NULL_TREE)
2112 {
2113 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2114 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
083481d8 2115 new_pattern_def_seq (stmt_vinfo, def_stmt);
732a0ad3
JJ
2116 }
2117
2118 /* Pattern detected. */
73fbfcad 2119 if (dump_enabled_p ())
ccb3ad87 2120 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2121 "vect_recog_vector_vector_shift_pattern: detected:\n");
732a0ad3
JJ
2122
2123 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2124 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2125 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
732a0ad3 2126
73fbfcad 2127 if (dump_enabled_p ())
78c60e3d 2128 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
732a0ad3 2129
9771b263 2130 stmts->safe_push (last_stmt);
732a0ad3
JJ
2131 return pattern_stmt;
2132}
2133
53109ba8
KT
2134/* Return true iff the target has a vector optab implementing the operation
2135 CODE on type VECTYPE. */
47486460 2136
53109ba8
KT
2137static bool
2138target_has_vecop_for_code (tree_code code, tree vectype)
2139{
2140 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2141 return voptab
2142 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2143}
47486460 2144
53109ba8
KT
2145/* Verify that the target has optabs of VECTYPE to perform all the steps
2146 needed by the multiplication-by-immediate synthesis algorithm described by
2147 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2148 present. Return true iff the target supports all the steps. */
2149
2150static bool
2151target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2152 tree vectype, bool synth_shift_p)
2153{
2154 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2155 return false;
2156
2157 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2158 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2159
2160 if (var == negate_variant
2161 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2162 return false;
2163
2164 /* If we must synthesize shifts with additions make sure that vector
2165 addition is available. */
2166 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2167 return false;
2168
2169 for (int i = 1; i < alg->ops; i++)
2170 {
2171 switch (alg->op[i])
2172 {
2173 case alg_shift:
2174 break;
2175 case alg_add_t_m2:
2176 case alg_add_t2_m:
2177 case alg_add_factor:
2178 if (!supports_vplus)
2179 return false;
2180 break;
2181 case alg_sub_t_m2:
2182 case alg_sub_t2_m:
2183 case alg_sub_factor:
2184 if (!supports_vminus)
2185 return false;
2186 break;
2187 case alg_unknown:
2188 case alg_m:
2189 case alg_zero:
2190 case alg_impossible:
2191 return false;
2192 default:
2193 gcc_unreachable ();
2194 }
2195 }
2196
2197 return true;
2198}
2199
2200/* Synthesize a left shift of OP by AMNT bits using a series of additions and
2201 putting the final result in DEST. Append all statements but the last into
2202 VINFO. Return the last statement. */
2203
2204static gimple *
2205synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2206 stmt_vec_info vinfo)
2207{
2208 HOST_WIDE_INT i;
2209 tree itype = TREE_TYPE (op);
2210 tree prev_res = op;
2211 gcc_assert (amnt >= 0);
2212 for (i = 0; i < amnt; i++)
2213 {
2214 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2215 : dest;
2216 gimple *stmt
2217 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2218 prev_res = tmp_var;
2219 if (i < amnt - 1)
2220 append_pattern_def_seq (vinfo, stmt);
2221 else
2222 return stmt;
2223 }
2224 gcc_unreachable ();
2225 return NULL;
2226}
2227
2228/* Helper for vect_synth_mult_by_constant. Apply a binary operation
2229 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2230 the process if necessary. Append the resulting assignment statements
2231 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2232 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2233 left shifts using additions. */
2234
2235static tree
2236apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2237 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2238{
2239 if (integer_zerop (op2)
2240 && (code == LSHIFT_EXPR
2241 || code == PLUS_EXPR))
2242 {
2243 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2244 return op1;
2245 }
2246
2247 gimple *stmt;
2248 tree itype = TREE_TYPE (op1);
2249 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2250
2251 if (code == LSHIFT_EXPR
2252 && synth_shift_p)
2253 {
2254 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2255 stmt_vinfo);
2256 append_pattern_def_seq (stmt_vinfo, stmt);
2257 return tmp_var;
2258 }
2259
2260 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2261 append_pattern_def_seq (stmt_vinfo, stmt);
2262 return tmp_var;
2263}
2264
2265/* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2266 and simple arithmetic operations to be vectorized. Record the statements
2267 produced in STMT_VINFO and return the last statement in the sequence or
2268 NULL if it's not possible to synthesize such a multiplication.
2269 This function mirrors the behavior of expand_mult_const in expmed.c but
2270 works on tree-ssa form. */
2271
2272static gimple *
2273vect_synth_mult_by_constant (tree op, tree val,
2274 stmt_vec_info stmt_vinfo)
2275{
2276 tree itype = TREE_TYPE (op);
2277 machine_mode mode = TYPE_MODE (itype);
2278 struct algorithm alg;
2279 mult_variant variant;
2280 if (!tree_fits_shwi_p (val))
2281 return NULL;
2282
2283 /* Multiplication synthesis by shifts, adds and subs can introduce
2284 signed overflow where the original operation didn't. Perform the
2285 operations on an unsigned type and cast back to avoid this.
2286 In the future we may want to relax this for synthesis algorithms
2287 that we can prove do not cause unexpected overflow. */
2288 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2289
2290 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
47486460 2291
53109ba8
KT
2292 /* Targets that don't support vector shifts but support vector additions
2293 can synthesize shifts that way. */
2294 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2295
2296 HOST_WIDE_INT hwval = tree_to_shwi (val);
2297 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2298 The vectorizer's benefit analysis will decide whether it's beneficial
2299 to do this. */
2300 bool possible = choose_mult_variant (mode, hwval, &alg,
2301 &variant, MAX_COST);
2302 if (!possible)
2303 return NULL;
2304
2305 tree vectype = get_vectype_for_scalar_type (multtype);
2306
2307 if (!vectype
2308 || !target_supports_mult_synth_alg (&alg, variant,
2309 vectype, synth_shift_p))
2310 return NULL;
2311
2312 tree accumulator;
2313
2314 /* Clear out the sequence of statements so we can populate it below. */
2315 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2316 gimple *stmt = NULL;
2317
2318 if (cast_to_unsigned_p)
2319 {
2320 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2321 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2322 append_pattern_def_seq (stmt_vinfo, stmt);
2323 op = tmp_op;
2324 }
2325
2326 if (alg.op[0] == alg_zero)
2327 accumulator = build_int_cst (multtype, 0);
2328 else
2329 accumulator = op;
2330
2331 bool needs_fixup = (variant == negate_variant)
2332 || (variant == add_variant);
2333
2334 for (int i = 1; i < alg.ops; i++)
2335 {
2336 tree shft_log = build_int_cst (multtype, alg.log[i]);
2337 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2338 tree tmp_var = NULL_TREE;
2339
2340 switch (alg.op[i])
2341 {
2342 case alg_shift:
2343 if (synth_shift_p)
2344 stmt
2345 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2346 stmt_vinfo);
2347 else
2348 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2349 shft_log);
2350 break;
2351 case alg_add_t_m2:
2352 tmp_var
2353 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2354 stmt_vinfo, synth_shift_p);
2355 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2356 tmp_var);
2357 break;
2358 case alg_sub_t_m2:
2359 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2360 shft_log, stmt_vinfo,
2361 synth_shift_p);
2362 /* In some algorithms the first step involves zeroing the
2363 accumulator. If subtracting from such an accumulator
2364 just emit the negation directly. */
2365 if (integer_zerop (accumulator))
2366 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2367 else
2368 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2369 tmp_var);
2370 break;
2371 case alg_add_t2_m:
2372 tmp_var
2373 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2374 stmt_vinfo, synth_shift_p);
2375 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2376 break;
2377 case alg_sub_t2_m:
2378 tmp_var
2379 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2380 stmt_vinfo, synth_shift_p);
2381 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2382 break;
2383 case alg_add_factor:
2384 tmp_var
2385 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2386 stmt_vinfo, synth_shift_p);
2387 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2388 tmp_var);
2389 break;
2390 case alg_sub_factor:
2391 tmp_var
2392 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2393 stmt_vinfo, synth_shift_p);
2394 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2395 accumulator);
2396 break;
2397 default:
2398 gcc_unreachable ();
2399 }
2400 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2401 but rather return it directly. */
2402
2403 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2404 append_pattern_def_seq (stmt_vinfo, stmt);
2405 accumulator = accum_tmp;
2406 }
2407 if (variant == negate_variant)
2408 {
2409 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2410 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2411 accumulator = accum_tmp;
2412 if (cast_to_unsigned_p)
2413 append_pattern_def_seq (stmt_vinfo, stmt);
2414 }
2415 else if (variant == add_variant)
2416 {
2417 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2418 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2419 accumulator = accum_tmp;
2420 if (cast_to_unsigned_p)
2421 append_pattern_def_seq (stmt_vinfo, stmt);
2422 }
2423 /* Move back to a signed if needed. */
2424 if (cast_to_unsigned_p)
2425 {
2426 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2427 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2428 }
2429
2430 return stmt;
2431}
2432
2433/* Detect multiplication by constant and convert it into a sequence of
2434 shifts and additions, subtractions, negations. We reuse the
2435 choose_mult_variant algorithms from expmed.c
47486460
VK
2436
2437 Input/Output:
2438
2439 STMTS: Contains a stmt from which the pattern search begins,
53109ba8 2440 i.e. the mult stmt.
47486460
VK
2441
2442 Output:
2443
2444 * TYPE_IN: The type of the input arguments to the pattern.
2445
2446 * TYPE_OUT: The type of the output of this pattern.
2447
53109ba8
KT
2448 * Return value: A new stmt that will be used to replace
2449 the multiplication. */
47486460 2450
355fe088
TS
2451static gimple *
2452vect_recog_mult_pattern (vec<gimple *> *stmts,
47486460
VK
2453 tree *type_in, tree *type_out)
2454{
355fe088 2455 gimple *last_stmt = stmts->pop ();
47486460 2456 tree oprnd0, oprnd1, vectype, itype;
53109ba8 2457 gimple *pattern_stmt;
47486460 2458 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
47486460
VK
2459
2460 if (!is_gimple_assign (last_stmt))
2461 return NULL;
2462
2463 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2464 return NULL;
2465
2466 oprnd0 = gimple_assign_rhs1 (last_stmt);
2467 oprnd1 = gimple_assign_rhs2 (last_stmt);
2468 itype = TREE_TYPE (oprnd0);
2469
2470 if (TREE_CODE (oprnd0) != SSA_NAME
2471 || TREE_CODE (oprnd1) != INTEGER_CST
2472 || !INTEGRAL_TYPE_P (itype)
2473 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2474 return NULL;
2475
2476 vectype = get_vectype_for_scalar_type (itype);
2477 if (vectype == NULL_TREE)
2478 return NULL;
2479
2480 /* If the target can handle vectorized multiplication natively,
2481 don't attempt to optimize this. */
53109ba8
KT
2482 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2483 if (mul_optab != unknown_optab)
47486460
VK
2484 {
2485 machine_mode vec_mode = TYPE_MODE (vectype);
53109ba8 2486 int icode = (int) optab_handler (mul_optab, vec_mode);
47486460 2487 if (icode != CODE_FOR_nothing)
53109ba8 2488 return NULL;
47486460
VK
2489 }
2490
53109ba8
KT
2491 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2492 if (!pattern_stmt)
47486460
VK
2493 return NULL;
2494
2495 /* Pattern detected. */
2496 if (dump_enabled_p ())
2497 dump_printf_loc (MSG_NOTE, vect_location,
2498 "vect_recog_mult_pattern: detected:\n");
2499
2500 if (dump_enabled_p ())
2501 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2502 pattern_stmt,0);
2503
2504 stmts->safe_push (last_stmt);
2505 *type_in = vectype;
2506 *type_out = vectype;
2507
2508 return pattern_stmt;
2509}
2510
079c527f 2511/* Detect a signed division by a constant that wouldn't be
363477c0
JJ
2512 otherwise vectorized:
2513
2514 type a_t, b_t;
2515
2516 S1 a_t = b_t / N;
2517
079c527f 2518 where type 'type' is an integral type and N is a constant.
363477c0 2519
079c527f 2520 Similarly handle modulo by a constant:
363477c0
JJ
2521
2522 S4 a_t = b_t % N;
2523
2524 Input/Output:
2525
2526 * STMTS: Contains a stmt from which the pattern search begins,
079c527f
JJ
2527 i.e. the division stmt. S1 is replaced by if N is a power
2528 of two constant and type is signed:
363477c0
JJ
2529 S3 y_t = b_t < 0 ? N - 1 : 0;
2530 S2 x_t = b_t + y_t;
2531 S1' a_t = x_t >> log2 (N);
2532
079c527f
JJ
2533 S4 is replaced if N is a power of two constant and
2534 type is signed by (where *_T temporaries have unsigned type):
363477c0
JJ
2535 S9 y_T = b_t < 0 ? -1U : 0U;
2536 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2537 S7 z_t = (type) z_T;
2538 S6 w_t = b_t + z_t;
2539 S5 x_t = w_t & (N - 1);
2540 S4' a_t = x_t - z_t;
2541
2542 Output:
2543
2544 * TYPE_IN: The type of the input arguments to the pattern.
2545
2546 * TYPE_OUT: The type of the output of this pattern.
2547
2548 * Return value: A new stmt that will be used to replace the division
2549 S1 or modulo S4 stmt. */
2550
355fe088
TS
2551static gimple *
2552vect_recog_divmod_pattern (vec<gimple *> *stmts,
079c527f 2553 tree *type_in, tree *type_out)
363477c0 2554{
355fe088 2555 gimple *last_stmt = stmts->pop ();
5deb57cb 2556 tree oprnd0, oprnd1, vectype, itype, cond;
355fe088 2557 gimple *pattern_stmt, *def_stmt;
363477c0
JJ
2558 enum tree_code rhs_code;
2559 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
310213d4 2560 vec_info *vinfo = stmt_vinfo->vinfo;
363477c0 2561 optab optab;
00f07b86 2562 tree q;
079c527f 2563 int dummy_int, prec;
079c527f 2564 stmt_vec_info def_stmt_vinfo;
363477c0
JJ
2565
2566 if (!is_gimple_assign (last_stmt))
2567 return NULL;
2568
2569 rhs_code = gimple_assign_rhs_code (last_stmt);
2570 switch (rhs_code)
2571 {
2572 case TRUNC_DIV_EXPR:
2573 case TRUNC_MOD_EXPR:
2574 break;
2575 default:
2576 return NULL;
2577 }
2578
2579 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2580 return NULL;
2581
2582 oprnd0 = gimple_assign_rhs1 (last_stmt);
2583 oprnd1 = gimple_assign_rhs2 (last_stmt);
2584 itype = TREE_TYPE (oprnd0);
2585 if (TREE_CODE (oprnd0) != SSA_NAME
2586 || TREE_CODE (oprnd1) != INTEGER_CST
2587 || TREE_CODE (itype) != INTEGER_TYPE
079c527f 2588 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
363477c0
JJ
2589 return NULL;
2590
2591 vectype = get_vectype_for_scalar_type (itype);
2592 if (vectype == NULL_TREE)
2593 return NULL;
2594
2595 /* If the target can handle vectorized division or modulo natively,
2596 don't attempt to optimize this. */
2597 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2225b9f2 2598 if (optab != unknown_optab)
363477c0 2599 {
ef4bddc2 2600 machine_mode vec_mode = TYPE_MODE (vectype);
363477c0 2601 int icode = (int) optab_handler (optab, vec_mode);
e6d4f8f5 2602 if (icode != CODE_FOR_nothing)
363477c0
JJ
2603 return NULL;
2604 }
2605
079c527f
JJ
2606 prec = TYPE_PRECISION (itype);
2607 if (integer_pow2p (oprnd1))
363477c0 2608 {
079c527f
JJ
2609 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2610 return NULL;
363477c0 2611
079c527f 2612 /* Pattern detected. */
73fbfcad 2613 if (dump_enabled_p ())
ccb3ad87 2614 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2615 "vect_recog_divmod_pattern: detected:\n");
079c527f
JJ
2616
2617 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2618 build_int_cst (itype, 0));
2619 if (rhs_code == TRUNC_DIV_EXPR)
2620 {
2621 tree var = vect_recog_temp_ssa_var (itype, NULL);
2622 tree shift;
2623 def_stmt
0d0e4a03
JJ
2624 = gimple_build_assign (var, COND_EXPR, cond,
2625 fold_build2 (MINUS_EXPR, itype, oprnd1,
2626 build_int_cst (itype, 1)),
2627 build_int_cst (itype, 0));
079c527f
JJ
2628 new_pattern_def_seq (stmt_vinfo, def_stmt);
2629 var = vect_recog_temp_ssa_var (itype, NULL);
2630 def_stmt
0d0e4a03
JJ
2631 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2632 gimple_assign_lhs (def_stmt));
079c527f
JJ
2633 append_pattern_def_seq (stmt_vinfo, def_stmt);
2634
2635 shift = build_int_cst (itype, tree_log2 (oprnd1));
2636 pattern_stmt
0d0e4a03
JJ
2637 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2638 RSHIFT_EXPR, var, shift);
079c527f
JJ
2639 }
2640 else
2641 {
2642 tree signmask;
2643 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2644 if (compare_tree_int (oprnd1, 2) == 0)
2645 {
2646 signmask = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2647 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2648 build_int_cst (itype, 1),
2649 build_int_cst (itype, 0));
079c527f
JJ
2650 append_pattern_def_seq (stmt_vinfo, def_stmt);
2651 }
2652 else
2653 {
2654 tree utype
2655 = build_nonstandard_integer_type (prec, 1);
2656 tree vecutype = get_vectype_for_scalar_type (utype);
2657 tree shift
2658 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2659 - tree_log2 (oprnd1));
2660 tree var = vect_recog_temp_ssa_var (utype, NULL);
2661
0d0e4a03
JJ
2662 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2663 build_int_cst (utype, -1),
2664 build_int_cst (utype, 0));
310213d4 2665 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
079c527f
JJ
2666 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2667 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2668 append_pattern_def_seq (stmt_vinfo, def_stmt);
2669 var = vect_recog_temp_ssa_var (utype, NULL);
0d0e4a03
JJ
2670 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2671 gimple_assign_lhs (def_stmt),
2672 shift);
310213d4 2673 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
079c527f
JJ
2674 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2675 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2676 append_pattern_def_seq (stmt_vinfo, def_stmt);
2677 signmask = vect_recog_temp_ssa_var (itype, NULL);
2678 def_stmt
0d0e4a03 2679 = gimple_build_assign (signmask, NOP_EXPR, var);
079c527f
JJ
2680 append_pattern_def_seq (stmt_vinfo, def_stmt);
2681 }
2682 def_stmt
0d0e4a03
JJ
2683 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2684 PLUS_EXPR, oprnd0, signmask);
079c527f
JJ
2685 append_pattern_def_seq (stmt_vinfo, def_stmt);
2686 def_stmt
0d0e4a03
JJ
2687 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2688 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2689 fold_build2 (MINUS_EXPR, itype, oprnd1,
2690 build_int_cst (itype, 1)));
079c527f
JJ
2691 append_pattern_def_seq (stmt_vinfo, def_stmt);
2692
2693 pattern_stmt
0d0e4a03
JJ
2694 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2695 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2696 signmask);
079c527f
JJ
2697 }
2698
73fbfcad 2699 if (dump_enabled_p ())
78c60e3d
SS
2700 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2701 0);
079c527f 2702
9771b263 2703 stmts->safe_push (last_stmt);
079c527f
JJ
2704
2705 *type_in = vectype;
2706 *type_out = vectype;
2707 return pattern_stmt;
363477c0 2708 }
079c527f 2709
6b58915b
RS
2710 if (prec > HOST_BITS_PER_WIDE_INT
2711 || integer_zerop (oprnd1))
079c527f
JJ
2712 return NULL;
2713
00f07b86
RH
2714 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2715 return NULL;
079c527f
JJ
2716
2717 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2718
2719 if (TYPE_UNSIGNED (itype))
363477c0 2720 {
079c527f
JJ
2721 unsigned HOST_WIDE_INT mh, ml;
2722 int pre_shift, post_shift;
6b58915b
RS
2723 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2724 & GET_MODE_MASK (TYPE_MODE (itype)));
5deb57cb 2725 tree t1, t2, t3, t4;
079c527f 2726
fecfbfa4 2727 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
079c527f
JJ
2728 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2729 return NULL;
2730
2731 /* Find a suitable multiplier and right shift count
2732 instead of multiplying with D. */
2733 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2734
2735 /* If the suggested multiplier is more than SIZE bits, we can do better
2736 for even divisors, using an initial right shift. */
2737 if (mh != 0 && (d & 1) == 0)
363477c0 2738 {
146ec50f 2739 pre_shift = ctz_or_zero (d);
079c527f
JJ
2740 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2741 &ml, &post_shift, &dummy_int);
2742 gcc_assert (!mh);
2743 }
2744 else
2745 pre_shift = 0;
2746
2747 if (mh != 0)
2748 {
2749 if (post_shift - 1 >= prec)
2750 return NULL;
2751
5deb57cb
JJ
2752 /* t1 = oprnd0 h* ml;
2753 t2 = oprnd0 - t1;
2754 t3 = t2 >> 1;
2755 t4 = t1 + t3;
2756 q = t4 >> (post_shift - 1); */
2757 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2758 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2759 build_int_cst (itype, ml));
079c527f 2760 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2761
5deb57cb 2762 t2 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2763 def_stmt
0d0e4a03 2764 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
083481d8 2765 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
2766
2767 t3 = vect_recog_temp_ssa_var (itype, NULL);
2768 def_stmt
0d0e4a03 2769 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
079c527f
JJ
2770 append_pattern_def_seq (stmt_vinfo, def_stmt);
2771
5deb57cb 2772 t4 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2773 def_stmt
0d0e4a03 2774 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
079c527f
JJ
2775
2776 if (post_shift != 1)
2777 {
2778 append_pattern_def_seq (stmt_vinfo, def_stmt);
2779
5deb57cb 2780 q = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2781 pattern_stmt
0d0e4a03
JJ
2782 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2783 build_int_cst (itype, post_shift - 1));
079c527f
JJ
2784 }
2785 else
2786 {
5deb57cb 2787 q = t4;
079c527f
JJ
2788 pattern_stmt = def_stmt;
2789 }
363477c0
JJ
2790 }
2791 else
2792 {
079c527f
JJ
2793 if (pre_shift >= prec || post_shift >= prec)
2794 return NULL;
2795
2796 /* t1 = oprnd0 >> pre_shift;
5deb57cb
JJ
2797 t2 = t1 h* ml;
2798 q = t2 >> post_shift; */
079c527f
JJ
2799 if (pre_shift)
2800 {
2801 t1 = vect_recog_temp_ssa_var (itype, NULL);
2802 def_stmt
0d0e4a03
JJ
2803 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2804 build_int_cst (NULL, pre_shift));
079c527f
JJ
2805 append_pattern_def_seq (stmt_vinfo, def_stmt);
2806 }
2807 else
2808 t1 = oprnd0;
363477c0 2809
5deb57cb 2810 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2811 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2812 build_int_cst (itype, ml));
079c527f 2813
5deb57cb
JJ
2814 if (post_shift)
2815 {
2816 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2817
5deb57cb
JJ
2818 q = vect_recog_temp_ssa_var (itype, NULL);
2819 def_stmt
0d0e4a03
JJ
2820 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2821 build_int_cst (itype, post_shift));
5deb57cb
JJ
2822 }
2823 else
2824 q = t2;
2825
2826 pattern_stmt = def_stmt;
079c527f
JJ
2827 }
2828 }
2829 else
2830 {
2831 unsigned HOST_WIDE_INT ml;
4ee4c52c 2832 int post_shift;
6b58915b 2833 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
079c527f
JJ
2834 unsigned HOST_WIDE_INT abs_d;
2835 bool add = false;
5deb57cb 2836 tree t1, t2, t3, t4;
079c527f
JJ
2837
2838 /* Give up for -1. */
2839 if (d == -1)
2840 return NULL;
2841
079c527f
JJ
2842 /* Since d might be INT_MIN, we have to cast to
2843 unsigned HOST_WIDE_INT before negating to avoid
2844 undefined signed overflow. */
2845 abs_d = (d >= 0
2846 ? (unsigned HOST_WIDE_INT) d
2847 : - (unsigned HOST_WIDE_INT) d);
2848
2849 /* n rem d = n rem -d */
2850 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2851 {
2852 d = abs_d;
2853 oprnd1 = build_int_cst (itype, abs_d);
2854 }
2855 else if (HOST_BITS_PER_WIDE_INT >= prec
fecfbfa4 2856 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
079c527f
JJ
2857 /* This case is not handled correctly below. */
2858 return NULL;
2859
4ee4c52c 2860 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
fecfbfa4 2861 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
079c527f
JJ
2862 {
2863 add = true;
dd4786fe 2864 ml |= HOST_WIDE_INT_M1U << (prec - 1);
079c527f
JJ
2865 }
2866 if (post_shift >= prec)
2867 return NULL;
2868
7abed779 2869 /* t1 = oprnd0 h* ml; */
5deb57cb 2870 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2871 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2872 build_int_cst (itype, ml));
079c527f
JJ
2873
2874 if (add)
2875 {
5deb57cb 2876 /* t2 = t1 + oprnd0; */
7abed779 2877 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2878 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2879 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
079c527f
JJ
2880 }
2881 else
5deb57cb 2882 t2 = t1;
079c527f 2883
5deb57cb 2884 if (post_shift)
079c527f 2885 {
5deb57cb 2886 /* t3 = t2 >> post_shift; */
7abed779 2887 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2888 t3 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2889 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2890 build_int_cst (itype, post_shift));
363477c0 2891 }
079c527f 2892 else
5deb57cb 2893 t3 = t2;
079c527f 2894
807e902e 2895 wide_int oprnd0_min, oprnd0_max;
7abed779
JJ
2896 int msb = 1;
2897 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2898 {
807e902e 2899 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
7abed779 2900 msb = 0;
807e902e 2901 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
7abed779
JJ
2902 msb = -1;
2903 }
079c527f 2904
7abed779
JJ
2905 if (msb == 0 && d >= 0)
2906 {
2907 /* q = t3; */
2908 q = t3;
2909 pattern_stmt = def_stmt;
2910 }
2911 else
2912 {
2913 /* t4 = oprnd0 >> (prec - 1);
2914 or if we know from VRP that oprnd0 >= 0
2915 t4 = 0;
2916 or if we know from VRP that oprnd0 < 0
2917 t4 = -1; */
2918 append_pattern_def_seq (stmt_vinfo, def_stmt);
2919 t4 = vect_recog_temp_ssa_var (itype, NULL);
2920 if (msb != 1)
0d0e4a03
JJ
2921 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2922 build_int_cst (itype, msb));
7abed779 2923 else
0d0e4a03
JJ
2924 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2925 build_int_cst (itype, prec - 1));
7abed779
JJ
2926 append_pattern_def_seq (stmt_vinfo, def_stmt);
2927
2928 /* q = t3 - t4; or q = t4 - t3; */
2929 q = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2930 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2931 d < 0 ? t3 : t4);
7abed779 2932 }
079c527f
JJ
2933 }
2934
2935 if (rhs_code == TRUNC_MOD_EXPR)
2936 {
2937 tree r, t1;
2938
2939 /* We divided. Now finish by:
2940 t1 = q * oprnd1;
2941 r = oprnd0 - t1; */
2942 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2943
2944 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2945 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
083481d8 2946 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0 2947
079c527f 2948 r = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2949 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
363477c0
JJ
2950 }
2951
079c527f 2952 /* Pattern detected. */
73fbfcad 2953 if (dump_enabled_p ())
78c60e3d 2954 {
ccb3ad87 2955 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 2956 "vect_recog_divmod_pattern: detected: ");
ccb3ad87 2957 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
78c60e3d 2958 }
363477c0 2959
9771b263 2960 stmts->safe_push (last_stmt);
363477c0
JJ
2961
2962 *type_in = vectype;
2963 *type_out = vectype;
2964 return pattern_stmt;
2965}
2966
69d2aade
JJ
2967/* Function vect_recog_mixed_size_cond_pattern
2968
2969 Try to find the following pattern:
2970
2971 type x_t, y_t;
2972 TYPE a_T, b_T, c_T;
2973 loop:
2974 S1 a_T = x_t CMP y_t ? b_T : c_T;
2975
2976 where type 'TYPE' is an integral type which has different size
bc4fb355 2977 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
69d2aade 2978 than 'type', the constants need to fit into an integer type
bc4fb355 2979 with the same width as 'type') or results of conversion from 'type'.
69d2aade
JJ
2980
2981 Input:
2982
2983 * LAST_STMT: A stmt from which the pattern search begins.
2984
2985 Output:
2986
2987 * TYPE_IN: The type of the input arguments to the pattern.
2988
2989 * TYPE_OUT: The type of the output of this pattern.
2990
2991 * Return value: A new stmt that will be used to replace the pattern.
2992 Additionally a def_stmt is added.
2993
2994 a_it = x_t CMP y_t ? b_it : c_it;
2995 a_T = (TYPE) a_it; */
2996
355fe088
TS
2997static gimple *
2998vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
69d2aade
JJ
2999 tree *type_out)
3000{
355fe088 3001 gimple *last_stmt = (*stmts)[0];
69d2aade
JJ
3002 tree cond_expr, then_clause, else_clause;
3003 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
bc4fb355 3004 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
355fe088 3005 gimple *pattern_stmt, *def_stmt;
310213d4 3006 vec_info *vinfo = stmt_vinfo->vinfo;
bc4fb355 3007 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
355fe088 3008 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
bc4fb355
IR
3009 bool promotion;
3010 tree comp_scalar_type;
69d2aade
JJ
3011
3012 if (!is_gimple_assign (last_stmt)
3013 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3014 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3015 return NULL;
3016
3017 cond_expr = gimple_assign_rhs1 (last_stmt);
3018 then_clause = gimple_assign_rhs2 (last_stmt);
3019 else_clause = gimple_assign_rhs3 (last_stmt);
3020
87aab9b2
JJ
3021 if (!COMPARISON_CLASS_P (cond_expr))
3022 return NULL;
3023
bc4fb355
IR
3024 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3025 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
87aab9b2 3026 if (comp_vectype == NULL_TREE)
69d2aade
JJ
3027 return NULL;
3028
3029 type = gimple_expr_type (last_stmt);
bc4fb355
IR
3030 if (types_compatible_p (type, comp_scalar_type)
3031 || ((TREE_CODE (then_clause) != INTEGER_CST
3032 || TREE_CODE (else_clause) != INTEGER_CST)
3033 && !INTEGRAL_TYPE_P (comp_scalar_type))
3034 || !INTEGRAL_TYPE_P (type))
3035 return NULL;
3036
3037 if ((TREE_CODE (then_clause) != INTEGER_CST
3038 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3039 &def_stmt0, &promotion))
3040 || (TREE_CODE (else_clause) != INTEGER_CST
3041 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3042 &def_stmt1, &promotion)))
3043 return NULL;
3044
3045 if (orig_type0 && orig_type1
3046 && !types_compatible_p (orig_type0, orig_type1))
3047 return NULL;
3048
3049 if (orig_type0)
3050 {
3051 if (!types_compatible_p (orig_type0, comp_scalar_type))
3052 return NULL;
3053 then_clause = gimple_assign_rhs1 (def_stmt0);
3054 itype = orig_type0;
3055 }
3056
3057 if (orig_type1)
3058 {
3059 if (!types_compatible_p (orig_type1, comp_scalar_type))
3060 return NULL;
3061 else_clause = gimple_assign_rhs1 (def_stmt1);
3062 itype = orig_type1;
3063 }
3064
69d2aade 3065
6c825cd4
DS
3066 HOST_WIDE_INT cmp_mode_size
3067 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3068
3069 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
69d2aade
JJ
3070 return NULL;
3071
3072 vectype = get_vectype_for_scalar_type (type);
3073 if (vectype == NULL_TREE)
3074 return NULL;
3075
96592eed 3076 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
69d2aade
JJ
3077 return NULL;
3078
bc4fb355 3079 if (itype == NULL_TREE)
6c825cd4 3080 itype = build_nonstandard_integer_type (cmp_mode_size,
bc4fb355
IR
3081 TYPE_UNSIGNED (type));
3082
69d2aade 3083 if (itype == NULL_TREE
6c825cd4 3084 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
69d2aade
JJ
3085 return NULL;
3086
3087 vecitype = get_vectype_for_scalar_type (itype);
3088 if (vecitype == NULL_TREE)
3089 return NULL;
3090
96592eed 3091 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
69d2aade
JJ
3092 return NULL;
3093
6c825cd4 3094 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
69d2aade 3095 {
bc4fb355
IR
3096 if ((TREE_CODE (then_clause) == INTEGER_CST
3097 && !int_fits_type_p (then_clause, itype))
3098 || (TREE_CODE (else_clause) == INTEGER_CST
3099 && !int_fits_type_p (else_clause, itype)))
69d2aade
JJ
3100 return NULL;
3101 }
3102
0d0e4a03
JJ
3103 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3104 COND_EXPR, unshare_expr (cond_expr),
3105 fold_convert (itype, then_clause),
3106 fold_convert (itype, else_clause));
3107 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3108 NOP_EXPR, gimple_assign_lhs (def_stmt));
69d2aade 3109
083481d8 3110 new_pattern_def_seq (stmt_vinfo, def_stmt);
310213d4 3111 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
69d2aade
JJ
3112 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3113 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3114 *type_in = vecitype;
3115 *type_out = vectype;
3116
73fbfcad 3117 if (dump_enabled_p ())
ccb3ad87 3118 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3119 "vect_recog_mixed_size_cond_pattern: detected:\n");
f5709183 3120
69d2aade
JJ
3121 return pattern_stmt;
3122}
3123
3124
71c92d17 3125/* Helper function of vect_recog_bool_pattern. Called recursively, return
42fd8198
IE
3126 true if bool VAR can and should be optimized that way. Assume it shouldn't
3127 in case it's a result of a comparison which can be directly vectorized into
fa2c9034
RB
3128 a vector comparison. Fills in STMTS with all stmts visited during the
3129 walk. */
71c92d17
JJ
3130
3131static bool
fa2c9034 3132check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
71c92d17 3133{
355fe088 3134 gimple *def_stmt;
71c92d17 3135 enum vect_def_type dt;
81c40241 3136 tree rhs1;
71c92d17
JJ
3137 enum tree_code rhs_code;
3138
81c40241 3139 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
71c92d17
JJ
3140 return false;
3141
3142 if (dt != vect_internal_def)
3143 return false;
3144
3145 if (!is_gimple_assign (def_stmt))
3146 return false;
3147
fa2c9034
RB
3148 if (stmts.contains (def_stmt))
3149 return true;
71c92d17
JJ
3150
3151 rhs1 = gimple_assign_rhs1 (def_stmt);
3152 rhs_code = gimple_assign_rhs_code (def_stmt);
3153 switch (rhs_code)
3154 {
3155 case SSA_NAME:
fa2c9034
RB
3156 if (! check_bool_pattern (rhs1, vinfo, stmts))
3157 return false;
3158 break;
71c92d17
JJ
3159
3160 CASE_CONVERT:
3161 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
3162 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3163 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
3164 return false;
fa2c9034
RB
3165 if (! check_bool_pattern (rhs1, vinfo, stmts))
3166 return false;
3167 break;
71c92d17
JJ
3168
3169 case BIT_NOT_EXPR:
fa2c9034
RB
3170 if (! check_bool_pattern (rhs1, vinfo, stmts))
3171 return false;
3172 break;
71c92d17
JJ
3173
3174 case BIT_AND_EXPR:
3175 case BIT_IOR_EXPR:
3176 case BIT_XOR_EXPR:
fa2c9034
RB
3177 if (! check_bool_pattern (rhs1, vinfo, stmts)
3178 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
71c92d17 3179 return false;
fa2c9034 3180 break;
71c92d17
JJ
3181
3182 default:
3183 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3184 {
fa2c9034 3185 tree vecitype, comp_vectype;
71c92d17 3186
2f326699
JJ
3187 /* If the comparison can throw, then is_gimple_condexpr will be
3188 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3189 if (stmt_could_throw_p (def_stmt))
3190 return false;
3191
71c92d17
JJ
3192 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3193 if (comp_vectype == NULL_TREE)
3194 return false;
3195
fa2c9034 3196 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
42fd8198 3197 if (mask_type
96592eed 3198 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
42fd8198
IE
3199 return false;
3200
71c92d17
JJ
3201 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3202 {
ef4bddc2 3203 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3204 tree itype
ab0ef706 3205 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3206 vecitype = get_vectype_for_scalar_type (itype);
3207 if (vecitype == NULL_TREE)
3208 return false;
3209 }
3210 else
3211 vecitype = comp_vectype;
96592eed 3212 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
fa2c9034 3213 return false;
71c92d17 3214 }
fa2c9034
RB
3215 else
3216 return false;
3217 break;
71c92d17 3218 }
fa2c9034
RB
3219
3220 bool res = stmts.add (def_stmt);
3221 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3222 gcc_assert (!res);
3223
3224 return true;
71c92d17
JJ
3225}
3226
3227
3228/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
fa2c9034
RB
3229 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3230 pattern sequence. */
71c92d17
JJ
3231
3232static tree
fa2c9034 3233adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
71c92d17 3234{
fa2c9034
RB
3235 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3236 NOP_EXPR, var);
3237 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3238 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3239 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3240 append_pattern_def_seq (stmt_info, cast_stmt);
71c92d17
JJ
3241 return gimple_assign_lhs (cast_stmt);
3242}
3243
fa2c9034
RB
3244/* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3245 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3246 type, OUT_TYPE is the desired final integer type of the whole pattern.
3247 STMT_INFO is the info of the pattern root and is where pattern stmts should
3248 be associated with. DEFS is a map of pattern defs. */
71c92d17 3249
fa2c9034
RB
3250static void
3251adjust_bool_pattern (tree var, tree out_type,
3252 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
71c92d17 3253{
355fe088 3254 gimple *stmt = SSA_NAME_DEF_STMT (var);
71c92d17
JJ
3255 enum tree_code rhs_code, def_rhs_code;
3256 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3257 location_t loc;
355fe088 3258 gimple *pattern_stmt, *def_stmt;
fa2c9034 3259 tree trueval = NULL_TREE;
71c92d17
JJ
3260
3261 rhs1 = gimple_assign_rhs1 (stmt);
3262 rhs2 = gimple_assign_rhs2 (stmt);
3263 rhs_code = gimple_assign_rhs_code (stmt);
3264 loc = gimple_location (stmt);
3265 switch (rhs_code)
3266 {
3267 case SSA_NAME:
3268 CASE_CONVERT:
fa2c9034 3269 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3270 itype = TREE_TYPE (irhs1);
3271 pattern_stmt
0d0e4a03
JJ
3272 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3273 SSA_NAME, irhs1);
71c92d17
JJ
3274 break;
3275
3276 case BIT_NOT_EXPR:
fa2c9034 3277 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3278 itype = TREE_TYPE (irhs1);
3279 pattern_stmt
0d0e4a03
JJ
3280 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3281 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
71c92d17
JJ
3282 break;
3283
3284 case BIT_AND_EXPR:
3285 /* Try to optimize x = y & (a < b ? 1 : 0); into
3286 x = (a < b ? y : 0);
3287
3288 E.g. for:
3289 bool a_b, b_b, c_b;
3290 TYPE d_T;
3291
3292 S1 a_b = x1 CMP1 y1;
3293 S2 b_b = x2 CMP2 y2;
3294 S3 c_b = a_b & b_b;
3295 S4 d_T = (TYPE) c_b;
3296
3297 we would normally emit:
3298
3299 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3300 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3301 S3' c_T = a_T & b_T;
3302 S4' d_T = c_T;
3303
3304 but we can save one stmt by using the
3305 result of one of the COND_EXPRs in the other COND_EXPR and leave
3306 BIT_AND_EXPR stmt out:
3307
3308 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3309 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3310 S4' f_T = c_T;
3311
3312 At least when VEC_COND_EXPR is implemented using masks
3313 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3314 computes the comparison masks and ands it, in one case with
3315 all ones vector, in the other case with a vector register.
3316 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3317 often more expensive. */
3318 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3319 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3320 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3321 {
fa2c9034 3322 irhs1 = *defs.get (rhs1);
71c92d17 3323 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
71c92d17
JJ
3324 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3325 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3326 {
fa2c9034
RB
3327 rhs_code = def_rhs_code;
3328 rhs1 = def_rhs1;
3329 rhs2 = gimple_assign_rhs2 (def_stmt);
3330 trueval = irhs1;
3331 goto do_compare;
71c92d17
JJ
3332 }
3333 else
fa2c9034 3334 irhs2 = *defs.get (rhs2);
71c92d17
JJ
3335 goto and_ior_xor;
3336 }
3337 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3338 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3339 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3340 {
fa2c9034 3341 irhs2 = *defs.get (rhs2);
71c92d17 3342 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
71c92d17
JJ
3343 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3344 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3345 {
fa2c9034
RB
3346 rhs_code = def_rhs_code;
3347 rhs1 = def_rhs1;
3348 rhs2 = gimple_assign_rhs2 (def_stmt);
3349 trueval = irhs2;
3350 goto do_compare;
71c92d17
JJ
3351 }
3352 else
fa2c9034 3353 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3354 goto and_ior_xor;
3355 }
3356 /* FALLTHRU */
3357 case BIT_IOR_EXPR:
3358 case BIT_XOR_EXPR:
fa2c9034
RB
3359 irhs1 = *defs.get (rhs1);
3360 irhs2 = *defs.get (rhs2);
71c92d17
JJ
3361 and_ior_xor:
3362 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3363 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3364 {
3365 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3366 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3367 int out_prec = TYPE_PRECISION (out_type);
3368 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
fa2c9034
RB
3369 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3370 stmt_info);
71c92d17 3371 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
fa2c9034
RB
3372 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3373 stmt_info);
71c92d17
JJ
3374 else
3375 {
fa2c9034
RB
3376 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3377 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
71c92d17
JJ
3378 }
3379 }
3380 itype = TREE_TYPE (irhs1);
3381 pattern_stmt
0d0e4a03
JJ
3382 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3383 rhs_code, irhs1, irhs2);
71c92d17
JJ
3384 break;
3385
3386 default:
fa2c9034 3387 do_compare:
71c92d17
JJ
3388 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3389 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
e6a21dd2
JJ
3390 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3391 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3392 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
71c92d17 3393 {
ef4bddc2 3394 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3395 itype
ab0ef706 3396 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3397 }
3398 else
3399 itype = TREE_TYPE (rhs1);
3400 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3401 if (trueval == NULL_TREE)
3402 trueval = build_int_cst (itype, 1);
3403 else
3404 gcc_checking_assert (useless_type_conversion_p (itype,
3405 TREE_TYPE (trueval)));
3406 pattern_stmt
0d0e4a03
JJ
3407 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3408 COND_EXPR, cond_expr, trueval,
3409 build_int_cst (itype, 0));
71c92d17
JJ
3410 break;
3411 }
3412
71c92d17 3413 gimple_set_location (pattern_stmt, loc);
fa2c9034
RB
3414 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3415 pattern def seq stmts instead of just letting auto-detection do
3416 its work? */
3417 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3418 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3419 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3420 append_pattern_def_seq (stmt_info, pattern_stmt);
3421 defs.put (var, gimple_assign_lhs (pattern_stmt));
3422}
3423
3424/* Comparison function to qsort a vector of gimple stmts after UID. */
3425
3426static int
3427sort_after_uid (const void *p1, const void *p2)
3428{
3429 const gimple *stmt1 = *(const gimple * const *)p1;
3430 const gimple *stmt2 = *(const gimple * const *)p2;
3431 return gimple_uid (stmt1) - gimple_uid (stmt2);
71c92d17
JJ
3432}
3433
fa2c9034
RB
3434/* Create pattern stmts for all stmts participating in the bool pattern
3435 specified by BOOL_STMT_SET and its root STMT with the desired type
3436 OUT_TYPE. Return the def of the pattern root. */
3437
3438static tree
3439adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3440 tree out_type, gimple *stmt)
3441{
3442 /* Gather original stmts in the bool pattern in their order of appearance
3443 in the IL. */
3444 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3445 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3446 i != bool_stmt_set.end (); ++i)
3447 bool_stmts.quick_push (*i);
3448 bool_stmts.qsort (sort_after_uid);
3449
3450 /* Now process them in that order, producing pattern stmts. */
3451 hash_map <tree, tree> defs;
3452 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3453 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3454 out_type, vinfo_for_stmt (stmt), defs);
3455
3456 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3457 gimple *pattern_stmt
3458 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3459 return gimple_assign_lhs (pattern_stmt);
3460}
71c92d17 3461
5116b156 3462/* Helper for search_type_for_mask. */
42fd8198
IE
3463
3464static tree
5116b156
JJ
3465search_type_for_mask_1 (tree var, vec_info *vinfo,
3466 hash_map<gimple *, tree> &cache)
42fd8198
IE
3467{
3468 gimple *def_stmt;
3469 enum vect_def_type dt;
3470 tree rhs1;
3471 enum tree_code rhs_code;
e6f5c25d 3472 tree res = NULL_TREE, res2;
42fd8198
IE
3473
3474 if (TREE_CODE (var) != SSA_NAME)
3475 return NULL_TREE;
3476
3477 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3478 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3479 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3480 return NULL_TREE;
3481
3482 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3483 return NULL_TREE;
3484
3485 if (dt != vect_internal_def)
3486 return NULL_TREE;
3487
3488 if (!is_gimple_assign (def_stmt))
3489 return NULL_TREE;
3490
5116b156
JJ
3491 tree *c = cache.get (def_stmt);
3492 if (c)
3493 return *c;
3494
42fd8198
IE
3495 rhs_code = gimple_assign_rhs_code (def_stmt);
3496 rhs1 = gimple_assign_rhs1 (def_stmt);
3497
3498 switch (rhs_code)
3499 {
3500 case SSA_NAME:
3501 case BIT_NOT_EXPR:
3502 CASE_CONVERT:
5116b156 3503 res = search_type_for_mask_1 (rhs1, vinfo, cache);
42fd8198
IE
3504 break;
3505
3506 case BIT_AND_EXPR:
3507 case BIT_IOR_EXPR:
3508 case BIT_XOR_EXPR:
5116b156
JJ
3509 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3510 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3511 cache);
e6f5c25d
IE
3512 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3513 res = res2;
42fd8198
IE
3514 break;
3515
3516 default:
3517 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3518 {
e6f5c25d
IE
3519 tree comp_vectype, mask_type;
3520
af3cdd34
IE
3521 if (TREE_CODE (TREE_TYPE (rhs1)) == BOOLEAN_TYPE)
3522 {
5116b156
JJ
3523 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3524 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3525 vinfo, cache);
af3cdd34
IE
3526 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3527 res = res2;
3528 break;
3529 }
3530
e6f5c25d
IE
3531 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3532 if (comp_vectype == NULL_TREE)
5116b156
JJ
3533 {
3534 res = NULL_TREE;
3535 break;
3536 }
e6f5c25d
IE
3537
3538 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3539 if (!mask_type
96592eed 3540 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5116b156
JJ
3541 {
3542 res = NULL_TREE;
3543 break;
3544 }
e6f5c25d 3545
42fd8198
IE
3546 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3547 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3548 {
3549 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3550 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3551 }
3552 else
3553 res = TREE_TYPE (rhs1);
3554 }
3555 }
3556
5116b156 3557 cache.put (def_stmt, res);
42fd8198
IE
3558 return res;
3559}
3560
5116b156
JJ
3561/* Return the proper type for converting bool VAR into
3562 an integer value or NULL_TREE if no such type exists.
3563 The type is chosen so that converted value has the
3564 same number of elements as VAR's vector type. */
3565
3566static tree
3567search_type_for_mask (tree var, vec_info *vinfo)
3568{
3569 hash_map<gimple *, tree> cache;
3570 return search_type_for_mask_1 (var, vinfo, cache);
3571}
42fd8198 3572
71c92d17
JJ
3573/* Function vect_recog_bool_pattern
3574
3575 Try to find pattern like following:
3576
3577 bool a_b, b_b, c_b, d_b, e_b;
3578 TYPE f_T;
3579 loop:
3580 S1 a_b = x1 CMP1 y1;
3581 S2 b_b = x2 CMP2 y2;
3582 S3 c_b = a_b & b_b;
3583 S4 d_b = x3 CMP3 y3;
3584 S5 e_b = c_b | d_b;
3585 S6 f_T = (TYPE) e_b;
3586
52264dbf
RB
3587 where type 'TYPE' is an integral type. Or a similar pattern
3588 ending in
3589
3590 S6 f_Y = e_b ? r_Y : s_Y;
3591
3592 as results from if-conversion of a complex condition.
71c92d17
JJ
3593
3594 Input:
3595
3596 * LAST_STMT: A stmt at the end from which the pattern
3597 search begins, i.e. cast of a bool to
3598 an integer type.
3599
3600 Output:
3601
3602 * TYPE_IN: The type of the input arguments to the pattern.
3603
3604 * TYPE_OUT: The type of the output of this pattern.
3605
3606 * Return value: A new stmt that will be used to replace the pattern.
3607
3608 Assuming size of TYPE is the same as size of all comparisons
3609 (otherwise some casts would be added where needed), the above
3610 sequence we create related pattern stmts:
3611 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3612 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3613 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3614 S5' e_T = c_T | d_T;
3615 S6' f_T = e_T;
3616
3617 Instead of the above S3' we could emit:
3618 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3619 S3' c_T = a_T | b_T;
3620 but the above is more efficient. */
3621
355fe088
TS
3622static gimple *
3623vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
71c92d17
JJ
3624 tree *type_out)
3625{
355fe088 3626 gimple *last_stmt = stmts->pop ();
71c92d17
JJ
3627 enum tree_code rhs_code;
3628 tree var, lhs, rhs, vectype;
3629 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
42fd8198 3630 stmt_vec_info new_stmt_info;
310213d4 3631 vec_info *vinfo = stmt_vinfo->vinfo;
355fe088 3632 gimple *pattern_stmt;
71c92d17
JJ
3633
3634 if (!is_gimple_assign (last_stmt))
3635 return NULL;
3636
3637 var = gimple_assign_rhs1 (last_stmt);
3638 lhs = gimple_assign_lhs (last_stmt);
3639
3640 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3641 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3642 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3643 return NULL;
3644
fa2c9034
RB
3645 hash_set<gimple *> bool_stmts;
3646
71c92d17
JJ
3647 rhs_code = gimple_assign_rhs_code (last_stmt);
3648 if (CONVERT_EXPR_CODE_P (rhs_code))
3649 {
78048b1c
JJ
3650 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3651 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
71c92d17
JJ
3652 return NULL;
3653 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3654 if (vectype == NULL_TREE)
3655 return NULL;
3656
fa2c9034 3657 if (check_bool_pattern (var, vinfo, bool_stmts))
42fd8198 3658 {
fa2c9034 3659 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
42fd8198
IE
3660 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3661 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3662 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3663 else
3664 pattern_stmt
3665 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3666 }
71c92d17 3667 else
42fd8198
IE
3668 {
3669 tree type = search_type_for_mask (var, vinfo);
a414c77f 3670 tree cst0, cst1, tmp;
42fd8198
IE
3671
3672 if (!type)
3673 return NULL;
3674
3675 /* We may directly use cond with narrowed type to avoid
3676 multiple cond exprs with following result packing and
3677 perform single cond with packed mask instead. In case
3678 of widening we better make cond first and then extract
3679 results. */
3680 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3681 type = TREE_TYPE (lhs);
3682
3683 cst0 = build_int_cst (type, 0);
3684 cst1 = build_int_cst (type, 1);
3685 tmp = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3686 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
42fd8198
IE
3687
3688 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3689 {
3690 tree new_vectype = get_vectype_for_scalar_type (type);
3691 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3692 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3693 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3694 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3695
3696 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3697 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3698 }
3699 }
3700
71c92d17
JJ
3701 *type_out = vectype;
3702 *type_in = vectype;
9771b263 3703 stmts->safe_push (last_stmt);
73fbfcad 3704 if (dump_enabled_p ())
ccb3ad87 3705 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3706 "vect_recog_bool_pattern: detected:\n");
f5709183 3707
52264dbf
RB
3708 return pattern_stmt;
3709 }
3710 else if (rhs_code == COND_EXPR
3711 && TREE_CODE (var) == SSA_NAME)
3712 {
3713 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3714 if (vectype == NULL_TREE)
3715 return NULL;
3716
3717 /* Build a scalar type for the boolean result that when
3718 vectorized matches the vector type of the result in
3719 size and number of elements. */
3720 unsigned prec
3721 = wi::udiv_trunc (TYPE_SIZE (vectype),
3722 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3723 tree type
3724 = build_nonstandard_integer_type (prec,
3725 TYPE_UNSIGNED (TREE_TYPE (var)));
3726 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3727 return NULL;
3728
fa2c9034 3729 if (!check_bool_pattern (var, vinfo, bool_stmts))
a414c77f
IE
3730 return NULL;
3731
fa2c9034 3732 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
52264dbf 3733
52264dbf
RB
3734 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3735 pattern_stmt
a414c77f
IE
3736 = gimple_build_assign (lhs, COND_EXPR,
3737 build2 (NE_EXPR, boolean_type_node,
3738 rhs, build_int_cst (type, 0)),
0d0e4a03
JJ
3739 gimple_assign_rhs2 (last_stmt),
3740 gimple_assign_rhs3 (last_stmt));
52264dbf
RB
3741 *type_out = vectype;
3742 *type_in = vectype;
3743 stmts->safe_push (last_stmt);
3744 if (dump_enabled_p ())
3745 dump_printf_loc (MSG_NOTE, vect_location,
3746 "vect_recog_bool_pattern: detected:\n");
3747
71c92d17
JJ
3748 return pattern_stmt;
3749 }
ab0ef706
JJ
3750 else if (rhs_code == SSA_NAME
3751 && STMT_VINFO_DATA_REF (stmt_vinfo))
3752 {
3753 stmt_vec_info pattern_stmt_info;
3754 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3755 gcc_assert (vectype != NULL_TREE);
78336739
JJ
3756 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3757 return NULL;
ab0ef706 3758
fa2c9034
RB
3759 if (check_bool_pattern (var, vinfo, bool_stmts))
3760 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
42fd8198
IE
3761 else
3762 {
3763 tree type = search_type_for_mask (var, vinfo);
a414c77f 3764 tree cst0, cst1, new_vectype;
42fd8198
IE
3765
3766 if (!type)
3767 return NULL;
3768
3769 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3770 type = TREE_TYPE (vectype);
3771
3772 cst0 = build_int_cst (type, 0);
3773 cst1 = build_int_cst (type, 1);
3774 new_vectype = get_vectype_for_scalar_type (type);
3775
3776 rhs = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3777 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
42fd8198
IE
3778
3779 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3780 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3781 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3782 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3783 }
3784
ab0ef706
JJ
3785 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3786 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3787 {
3788 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
355fe088 3789 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
42fd8198 3790 append_pattern_def_seq (stmt_vinfo, cast_stmt);
ab0ef706
JJ
3791 rhs = rhs2;
3792 }
0d0e4a03 3793 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
310213d4 3794 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
ab0ef706
JJ
3795 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3796 STMT_VINFO_DATA_REF (pattern_stmt_info)
3797 = STMT_VINFO_DATA_REF (stmt_vinfo);
3798 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3799 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3800 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3801 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3802 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3803 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3804 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3805 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
78048b1c 3806 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
ab0ef706
JJ
3807 *type_out = vectype;
3808 *type_in = vectype;
9771b263 3809 stmts->safe_push (last_stmt);
73fbfcad 3810 if (dump_enabled_p ())
ccb3ad87 3811 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3812 "vect_recog_bool_pattern: detected:\n");
ab0ef706
JJ
3813 return pattern_stmt;
3814 }
71c92d17
JJ
3815 else
3816 return NULL;
3817}
3818
3819
e6f5c25d
IE
3820/* A helper for vect_recog_mask_conversion_pattern. Build
3821 conversion of MASK to a type suitable for masking VECTYPE.
3822 Built statement gets required vectype and is appended to
3823 a pattern sequence of STMT_VINFO.
3824
3825 Return converted mask. */
3826
3827static tree
3828build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3829 vec_info *vinfo)
3830{
3831 gimple *stmt;
3832 tree masktype, tmp;
3833 stmt_vec_info new_stmt_info;
3834
3835 masktype = build_same_sized_truth_vector_type (vectype);
3836 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3837 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3838 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3839 set_vinfo_for_stmt (stmt, new_stmt_info);
3840 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3841 append_pattern_def_seq (stmt_vinfo, stmt);
3842
3843 return tmp;
3844}
3845
3846
3847/* Function vect_recog_mask_conversion_pattern
3848
3849 Try to find statements which require boolean type
3850 converison. Additional conversion statements are
3851 added to handle such cases. For example:
3852
3853 bool m_1, m_2, m_3;
3854 int i_4, i_5;
3855 double d_6, d_7;
3856 char c_1, c_2, c_3;
3857
3858 S1 m_1 = i_4 > i_5;
3859 S2 m_2 = d_6 < d_7;
3860 S3 m_3 = m_1 & m_2;
3861 S4 c_1 = m_3 ? c_2 : c_3;
3862
3863 Will be transformed into:
3864
3865 S1 m_1 = i_4 > i_5;
3866 S2 m_2 = d_6 < d_7;
3867 S3'' m_2' = (_Bool[bitsize=32])m_2
3868 S3' m_3' = m_1 & m_2';
3869 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3870 S4' c_1' = m_3'' ? c_2 : c_3; */
3871
3872static gimple *
3873vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3874 tree *type_out)
3875{
3876 gimple *last_stmt = stmts->pop ();
3877 enum tree_code rhs_code;
310aba3b
ML
3878 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3879 tree vectype1, vectype2;
e6f5c25d
IE
3880 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3881 stmt_vec_info pattern_stmt_info;
3882 vec_info *vinfo = stmt_vinfo->vinfo;
3883 gimple *pattern_stmt;
3884
3885 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3886 if (is_gimple_call (last_stmt)
3887 && gimple_call_internal_p (last_stmt)
3888 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3889 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3890 {
3891 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3892
3893 if (load)
3894 {
3895 lhs = gimple_call_lhs (last_stmt);
3896 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3897 }
3898 else
3899 {
3900 rhs2 = gimple_call_arg (last_stmt, 3);
3901 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3902 }
3903
3904 rhs1 = gimple_call_arg (last_stmt, 2);
3905 rhs1_type = search_type_for_mask (rhs1, vinfo);
3906 if (!rhs1_type)
3907 return NULL;
3908 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3909
3910 if (!vectype1 || !vectype2
3911 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3912 return NULL;
3913
3914 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3915
3916 if (load)
3917 {
3918 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3919 pattern_stmt
3920 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3921 gimple_call_arg (last_stmt, 0),
3922 gimple_call_arg (last_stmt, 1),
3923 tmp);
3924 gimple_call_set_lhs (pattern_stmt, lhs);
3925 }
3926 else
3927 pattern_stmt
3928 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3929 gimple_call_arg (last_stmt, 0),
3930 gimple_call_arg (last_stmt, 1),
3931 tmp,
3932 gimple_call_arg (last_stmt, 3));
3933
3934
3935 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3936 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3937 STMT_VINFO_DATA_REF (pattern_stmt_info)
3938 = STMT_VINFO_DATA_REF (stmt_vinfo);
3939 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3940 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3941 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3942 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3943 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3944 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3945 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3946 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3947 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3948
3949 *type_out = vectype1;
3950 *type_in = vectype1;
3951 stmts->safe_push (last_stmt);
3952 if (dump_enabled_p ())
3953 dump_printf_loc (MSG_NOTE, vect_location,
3954 "vect_recog_mask_conversion_pattern: detected:\n");
3955
3956 return pattern_stmt;
3957 }
3958
3959 if (!is_gimple_assign (last_stmt))
3960 return NULL;
3961
3962 lhs = gimple_assign_lhs (last_stmt);
3963 rhs1 = gimple_assign_rhs1 (last_stmt);
3964 rhs_code = gimple_assign_rhs_code (last_stmt);
3965
3966 /* Check for cond expression requiring mask conversion. */
3967 if (rhs_code == COND_EXPR)
3968 {
3969 /* vect_recog_mixed_size_cond_pattern could apply.
3970 Do nothing then. */
3971 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3972 return NULL;
3973
3974 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3975
3976 if (TREE_CODE (rhs1) == SSA_NAME)
3977 {
3978 rhs1_type = search_type_for_mask (rhs1, vinfo);
3979 if (!rhs1_type)
3980 return NULL;
3981 }
dea60b59 3982 else if (COMPARISON_CLASS_P (rhs1))
e6f5c25d 3983 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
dea60b59
JJ
3984 else
3985 return NULL;
e6f5c25d
IE
3986
3987 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3988
3989 if (!vectype1 || !vectype2
3990 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3991 return NULL;
3992
3993 /* If rhs1 is a comparison we need to move it into a
3994 separate statement. */
3995 if (TREE_CODE (rhs1) != SSA_NAME)
3996 {
3997 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3998 pattern_stmt = gimple_build_assign (tmp, rhs1);
3999 rhs1 = tmp;
4000
4001 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4002 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4003 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4004 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4005 }
4006
4007 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4008
4009 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4010 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4011 gimple_assign_rhs2 (last_stmt),
4012 gimple_assign_rhs3 (last_stmt));
4013
4014 *type_out = vectype1;
4015 *type_in = vectype1;
4016 stmts->safe_push (last_stmt);
4017 if (dump_enabled_p ())
4018 dump_printf_loc (MSG_NOTE, vect_location,
4019 "vect_recog_mask_conversion_pattern: detected:\n");
4020
4021 return pattern_stmt;
4022 }
4023
4024 /* Now check for binary boolean operations requiring conversion for
4025 one of operands. */
4026 if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)
4027 return NULL;
4028
4029 if (rhs_code != BIT_IOR_EXPR
4030 && rhs_code != BIT_XOR_EXPR
49e76ff1
IE
4031 && rhs_code != BIT_AND_EXPR
4032 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
e6f5c25d
IE
4033 return NULL;
4034
4035 rhs2 = gimple_assign_rhs2 (last_stmt);
4036
4037 rhs1_type = search_type_for_mask (rhs1, vinfo);
4038 rhs2_type = search_type_for_mask (rhs2, vinfo);
4039
4040 if (!rhs1_type || !rhs2_type
4041 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4042 return NULL;
4043
4044 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4045 {
4046 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4047 if (!vectype1)
4048 return NULL;
4049 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4050 }
4051 else
4052 {
4053 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4054 if (!vectype1)
4055 return NULL;
4056 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4057 }
4058
4059 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4060 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4061
4062 *type_out = vectype1;
4063 *type_in = vectype1;
4064 stmts->safe_push (last_stmt);
4065 if (dump_enabled_p ())
4066 dump_printf_loc (MSG_NOTE, vect_location,
4067 "vect_recog_mask_conversion_pattern: detected:\n");
4068
4069 return pattern_stmt;
4070}
4071
4072
1107f3ae
IR
4073/* Mark statements that are involved in a pattern. */
4074
4075static inline void
355fe088 4076vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
1107f3ae
IR
4077 tree pattern_vectype)
4078{
4079 stmt_vec_info pattern_stmt_info, def_stmt_info;
4080 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
310213d4 4081 vec_info *vinfo = orig_stmt_info->vinfo;
355fe088 4082 gimple *def_stmt;
1107f3ae 4083
1107f3ae 4084 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
ab0ef706
JJ
4085 if (pattern_stmt_info == NULL)
4086 {
310213d4 4087 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
ab0ef706
JJ
4088 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4089 }
4090 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1107f3ae
IR
4091
4092 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4093 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
ab0ef706 4094 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1107f3ae
IR
4095 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4096 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4097 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
363477c0
JJ
4098 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
4099 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4100 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
1107f3ae 4101 {
363477c0
JJ
4102 gimple_stmt_iterator si;
4103 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
4104 !gsi_end_p (si); gsi_next (&si))
69d2aade 4105 {
363477c0
JJ
4106 def_stmt = gsi_stmt (si);
4107 def_stmt_info = vinfo_for_stmt (def_stmt);
4108 if (def_stmt_info == NULL)
4109 {
310213d4 4110 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
363477c0
JJ
4111 set_vinfo_for_stmt (def_stmt, def_stmt_info);
4112 }
4113 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4114 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
56f8faae 4115 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
363477c0
JJ
4116 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4117 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
69d2aade 4118 }
1107f3ae
IR
4119 }
4120}
4121
b8698a0f 4122/* Function vect_pattern_recog_1
20f06221
DN
4123
4124 Input:
4125 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4126 computation pattern.
4127 STMT: A stmt from which the pattern search should start.
4128
4129 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
b8698a0f
L
4130 expression that computes the same functionality and can be used to
4131 replace the sequence of stmts that are involved in the pattern.
20f06221
DN
4132
4133 Output:
b8698a0f
L
4134 This function checks if the expression returned by PATTERN_RECOG_FUNC is
4135 supported in vector form by the target. We use 'TYPE_IN' to obtain the
4136 relevant vector type. If 'TYPE_IN' is already a vector type, then this
20f06221
DN
4137 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4138 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4139 to the available target pattern.
4140
b8698a0f 4141 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
4142 for vect_recog_pattern. */
4143
0bee0ef9
RB
4144static bool
4145vect_pattern_recog_1 (vect_recog_func *recog_func,
92aea285 4146 gimple_stmt_iterator si,
355fe088 4147 vec<gimple *> *stmts_to_replace)
20f06221 4148{
355fe088 4149 gimple *stmt = gsi_stmt (si), *pattern_stmt;
383d9c83 4150 stmt_vec_info stmt_info;
383d9c83 4151 loop_vec_info loop_vinfo;
20f06221
DN
4152 tree pattern_vectype;
4153 tree type_in, type_out;
20f06221 4154 enum tree_code code;
b5aeb3bb 4155 int i;
355fe088 4156 gimple *next;
20f06221 4157
9771b263
DN
4158 stmts_to_replace->truncate (0);
4159 stmts_to_replace->quick_push (stmt);
0bee0ef9 4160 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
726a989a 4161 if (!pattern_stmt)
0bee0ef9 4162 return false;
b8698a0f 4163
9771b263 4164 stmt = stmts_to_replace->last ();
383d9c83
IR
4165 stmt_info = vinfo_for_stmt (stmt);
4166 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4167
e6f5c25d
IE
4168 if (VECTOR_BOOLEAN_TYPE_P (type_in)
4169 || VECTOR_MODE_P (TYPE_MODE (type_in)))
b8698a0f
L
4170 {
4171 /* No need to check target support (already checked by the pattern
4172 recognition function). */
b690cc0f 4173 pattern_vectype = type_out ? type_out : type_in;
20f06221
DN
4174 }
4175 else
4176 {
ef4bddc2 4177 machine_mode vec_mode;
20f06221
DN
4178 enum insn_code icode;
4179 optab optab;
4180
4181 /* Check target support */
b690cc0f
RG
4182 type_in = get_vectype_for_scalar_type (type_in);
4183 if (!type_in)
0bee0ef9 4184 return false;
b690cc0f
RG
4185 if (type_out)
4186 type_out = get_vectype_for_scalar_type (type_out);
4187 else
4188 type_out = type_in;
15bbc165 4189 if (!type_out)
0bee0ef9 4190 return false;
b690cc0f 4191 pattern_vectype = type_out;
03d3e953 4192
726a989a
RB
4193 if (is_gimple_assign (pattern_stmt))
4194 code = gimple_assign_rhs_code (pattern_stmt);
4195 else
4196 {
4197 gcc_assert (is_gimple_call (pattern_stmt));
4198 code = CALL_EXPR;
4199 }
4200
b690cc0f
RG
4201 optab = optab_for_tree_code (code, type_in, optab_default);
4202 vec_mode = TYPE_MODE (type_in);
20f06221 4203 if (!optab
947131ba 4204 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
b690cc0f 4205 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
0bee0ef9 4206 return false;
20f06221
DN
4207 }
4208
4209 /* Found a vectorizable pattern. */
73fbfcad 4210 if (dump_enabled_p ())
20f06221 4211 {
ccb3ad87 4212 dump_printf_loc (MSG_NOTE, vect_location,
0bee0ef9 4213 "%s pattern recognized: ", recog_func->name);
ccb3ad87 4214 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
20f06221 4215 }
b8698a0f 4216
726a989a 4217 /* Mark the stmts that are involved in the pattern. */
1107f3ae 4218 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
20f06221 4219
b5aeb3bb
IR
4220 /* Patterns cannot be vectorized using SLP, because they change the order of
4221 computation. */
f5709183 4222 if (loop_vinfo)
9771b263 4223 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
f5709183 4224 if (next == stmt)
9771b263 4225 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
51312233 4226
1107f3ae
IR
4227 /* It is possible that additional pattern stmts are created and inserted in
4228 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4229 relevant statements. */
9771b263
DN
4230 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4231 && (unsigned) i < (stmts_to_replace->length () - 1);
51312233
IR
4232 i++)
4233 {
4234 stmt_info = vinfo_for_stmt (stmt);
4235 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
73fbfcad 4236 if (dump_enabled_p ())
51312233 4237 {
ccb3ad87 4238 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 4239 "additional pattern stmt: ");
ccb3ad87 4240 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
51312233
IR
4241 }
4242
1107f3ae 4243 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
51312233 4244 }
0bee0ef9
RB
4245
4246 return true;
20f06221
DN
4247}
4248
4249
4250/* Function vect_pattern_recog
4251
4252 Input:
4253 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4254 computation idioms.
4255
9d5e7640
IR
4256 Output - for each computation idiom that is detected we create a new stmt
4257 that provides the same functionality and that can be vectorized. We
20f06221
DN
4258 also record some information in the struct_stmt_info of the relevant
4259 stmts, as explained below:
4260
4261 At the entry to this function we have the following stmts, with the
4262 following initial value in the STMT_VINFO fields:
4263
4264 stmt in_pattern_p related_stmt vec_stmt
4265 S1: a_i = .... - - -
4266 S2: a_2 = ..use(a_i).. - - -
4267 S3: a_1 = ..use(a_2).. - - -
4268 S4: a_0 = ..use(a_1).. - - -
4269 S5: ... = ..use(a_0).. - - -
4270
4271 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
4272 represented by a single stmt. We then:
4273 - create a new stmt S6 equivalent to the pattern (the stmt is not
4274 inserted into the code)
20f06221
DN
4275 - fill in the STMT_VINFO fields as follows:
4276
4277 in_pattern_p related_stmt vec_stmt
b8698a0f 4278 S1: a_i = .... - - -
20f06221
DN
4279 S2: a_2 = ..use(a_i).. - - -
4280 S3: a_1 = ..use(a_2).. - - -
20f06221 4281 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 4282 '---> S6: a_new = .... - S4 -
20f06221
DN
4283 S5: ... = ..use(a_0).. - - -
4284
4285 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 4286 to each other through the RELATED_STMT field).
20f06221
DN
4287
4288 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4289 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4290 remain irrelevant unless used by stmts other than S4.
4291
4292 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 4293 (because they are marked as irrelevant). It will vectorize S6, and record
83197f37
IR
4294 a pointer to the new vector stmt VS6 from S6 (as usual).
4295 S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
4296
4297 in_pattern_p related_stmt vec_stmt
4298 S1: a_i = .... - - -
4299 S2: a_2 = ..use(a_i).. - - -
4300 S3: a_1 = ..use(a_2).. - - -
4301 > VS6: va_new = .... - - -
20f06221 4302 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 4303 '---> S6: a_new = .... - S4 VS6
20f06221
DN
4304 > VS5: ... = ..vuse(va_new).. - - -
4305 S5: ... = ..use(a_0).. - - -
4306
9d5e7640 4307 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
4308 elsewhere), and we'll end up with:
4309
b8698a0f 4310 VS6: va_new = ....
83197f37
IR
4311 VS5: ... = ..vuse(va_new)..
4312
4313 In case of more than one pattern statements, e.g., widen-mult with
4314 intermediate type:
4315
4316 S1 a_t = ;
4317 S2 a_T = (TYPE) a_t;
4318 '--> S3: a_it = (interm_type) a_t;
4319 S4 prod_T = a_T * CONST;
4320 '--> S5: prod_T' = a_it w* CONST;
4321
4322 there may be other users of a_T outside the pattern. In that case S2 will
4323 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4324 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4325 be recorded in S3. */
20f06221
DN
4326
4327void
310213d4 4328vect_pattern_recog (vec_info *vinfo)
20f06221 4329{
f5709183 4330 struct loop *loop;
772e61e1 4331 basic_block *bbs;
f5709183 4332 unsigned int nbbs;
726a989a 4333 gimple_stmt_iterator si;
20f06221 4334 unsigned int i, j;
355fe088
TS
4335 auto_vec<gimple *, 1> stmts_to_replace;
4336 gimple *stmt;
20f06221 4337
73fbfcad 4338 if (dump_enabled_p ())
78c60e3d 4339 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4340 "=== vect_pattern_recog ===\n");
20f06221 4341
310213d4 4342 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
f5709183
IR
4343 {
4344 loop = LOOP_VINFO_LOOP (loop_vinfo);
4345 bbs = LOOP_VINFO_BBS (loop_vinfo);
4346 nbbs = loop->num_nodes;
61d371eb
RB
4347
4348 /* Scan through the loop stmts, applying the pattern recognition
4349 functions starting at each stmt visited: */
4350 for (i = 0; i < nbbs; i++)
4351 {
4352 basic_block bb = bbs[i];
4353 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4354 {
4355 /* Scan over all generic vect_recog_xxx_pattern functions. */
4356 for (j = 0; j < NUM_PATTERNS; j++)
0bee0ef9
RB
4357 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4358 &stmts_to_replace))
4359 break;
61d371eb
RB
4360 }
4361 }
f5709183
IR
4362 }
4363 else
4364 {
61d371eb
RB
4365 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4366 for (si = bb_vinfo->region_begin;
4367 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4368 {
4369 if ((stmt = gsi_stmt (si))
f5709183
IR
4370 && vinfo_for_stmt (stmt)
4371 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
61d371eb 4372 continue;
f5709183 4373
61d371eb
RB
4374 /* Scan over all generic vect_recog_xxx_pattern functions. */
4375 for (j = 0; j < NUM_PATTERNS; j++)
0bee0ef9
RB
4376 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4377 &stmts_to_replace))
4378 break;
61d371eb 4379 }
20f06221
DN
4380 }
4381}