]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
818ab71a 2 Copyright (C) 2006-2016 Free Software Foundation, Inc.
20f06221
DN
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
20f06221
DN
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20f06221
DN
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
c7131fb2 24#include "backend.h"
957060b5 25#include "rtl.h"
20f06221 26#include "tree.h"
c7131fb2 27#include "gimple.h"
c7131fb2 28#include "ssa.h"
957060b5
AM
29#include "expmed.h"
30#include "optabs-tree.h"
31#include "insn-config.h"
32#include "recog.h" /* FIXME: for insn_data */
40e23961 33#include "fold-const.h"
d8a2d370 34#include "stor-layout.h"
2fb9a547 35#include "tree-eh.h"
45b0be94 36#include "gimplify.h"
5be5c238 37#include "gimple-iterator.h"
20f06221 38#include "cfgloop.h"
20f06221 39#include "tree-vectorizer.h"
7ee2468b 40#include "dumpfile.h"
9b2b7279 41#include "builtins.h"
b4e5bc47 42#include "internal-fn.h"
7a31e5ef 43#include "case-cfn-macros.h"
20f06221 44
20f06221 45/* Pattern recognition functions */
355fe088 46static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
51312233 47 tree *);
355fe088 48static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
51312233 49 tree *);
355fe088 50static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
51312233 51 tree *);
355fe088 52static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
79d652a5 53 tree *);
355fe088
TS
54static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
55static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
1107f3ae 56 tree *);
355fe088 57static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
36ba4aae 58 tree *, tree *);
355fe088
TS
59static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
60static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
732a0ad3 61 tree *, tree *);
355fe088 62static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
079c527f 63 tree *, tree *);
47486460 64
355fe088 65static gimple *vect_recog_mult_pattern (vec<gimple *> *,
47486460
VK
66 tree *, tree *);
67
355fe088 68static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
69d2aade 69 tree *, tree *);
355fe088 70static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
e6f5c25d 71static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
0bee0ef9
RB
72
73struct vect_recog_func
74{
75 vect_recog_func_ptr fn;
76 const char *name;
77};
5b723b68
RB
78
79/* Note that ordering matters - the first pattern matching on a stmt
80 is taken which means usually the more complex one needs to preceed
81 the less comples onex (widen_sum only after dot_prod or sad for example). */
0bee0ef9
RB
82static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
83 { vect_recog_widen_mult_pattern, "widen_mult" },
0bee0ef9
RB
84 { vect_recog_dot_prod_pattern, "dot_prod" },
85 { vect_recog_sad_pattern, "sad" },
5b723b68 86 { vect_recog_widen_sum_pattern, "widen_sum" },
0bee0ef9
RB
87 { vect_recog_pow_pattern, "pow" },
88 { vect_recog_widen_shift_pattern, "widen_shift" },
89 { vect_recog_over_widening_pattern, "over_widening" },
90 { vect_recog_rotate_pattern, "rotate" },
91 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
92 { vect_recog_divmod_pattern, "divmod" },
93 { vect_recog_mult_pattern, "mult" },
94 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
95 { vect_recog_bool_pattern, "bool" },
96 { vect_recog_mask_conversion_pattern, "mask_conversion" }
97};
20f06221 98
083481d8 99static inline void
355fe088 100append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
083481d8 101{
a1a6c5b2
JJ
102 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
103 stmt);
083481d8
JJ
104}
105
106static inline void
355fe088 107new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
083481d8
JJ
108{
109 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
110 append_pattern_def_seq (stmt_info, stmt);
111}
112
f71cf56a
UW
113/* Check whether STMT2 is in the same loop or basic block as STMT1.
114 Which of the two applies depends on whether we're currently doing
115 loop-based or basic-block-based vectorization, as determined by
116 the vinfo_for_stmt for STMT1 (which must be defined).
117
118 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
119 to be defined as well. */
120
121static bool
355fe088 122vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
f71cf56a
UW
123{
124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
61d371eb 125 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
f71cf56a
UW
126}
127
9a7a4398
UW
128/* If the LHS of DEF_STMT has a single use, and that statement is
129 in the same loop or basic block, return it. */
130
355fe088
TS
131static gimple *
132vect_single_imm_use (gimple *def_stmt)
9a7a4398
UW
133{
134 tree lhs = gimple_assign_lhs (def_stmt);
135 use_operand_p use_p;
355fe088 136 gimple *use_stmt;
9a7a4398
UW
137
138 if (!single_imm_use (lhs, &use_p, &use_stmt))
139 return NULL;
140
141 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
142 return NULL;
143
144 return use_stmt;
145}
146
bc4fb355 147/* Check whether NAME, an ssa-name used in USE_STMT,
79d652a5 148 is a result of a type promotion, such that:
20f06221 149 DEF_STMT: NAME = NOP (name0)
383d9c83
IR
150 If CHECK_SIGN is TRUE, check that either both types are signed or both are
151 unsigned. */
20f06221
DN
152
153static bool
355fe088
TS
154type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
155 tree *orig_type, gimple **def_stmt, bool *promotion)
20f06221 156{
355fe088 157 gimple *dummy_gimple;
20f06221 158 stmt_vec_info stmt_vinfo;
20f06221
DN
159 tree type = TREE_TYPE (name);
160 tree oprnd0;
161 enum vect_def_type dt;
20f06221
DN
162
163 stmt_vinfo = vinfo_for_stmt (use_stmt);
81c40241 164 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
20f06221
DN
165 return false;
166
8644a673
IR
167 if (dt != vect_internal_def
168 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
169 return false;
170
bc4fb355 171 if (!*def_stmt)
20f06221
DN
172 return false;
173
2a2b8f64
JJ
174 if (dt == vect_internal_def)
175 {
176 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
177 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
178 return false;
179 }
180
726a989a 181 if (!is_gimple_assign (*def_stmt))
20f06221
DN
182 return false;
183
bc4fb355 184 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
20f06221
DN
185 return false;
186
726a989a 187 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221 188
bc4fb355
IR
189 *orig_type = TREE_TYPE (oprnd0);
190 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
191 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
192 return false;
193
194 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
195 *promotion = true;
bc4fb355 196 else
79d652a5 197 *promotion = false;
20f06221 198
81c40241 199 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
20f06221
DN
200 return false;
201
20f06221
DN
202 return true;
203}
204
726a989a
RB
205/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
206 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
207
208static tree
355fe088 209vect_recog_temp_ssa_var (tree type, gimple *stmt)
726a989a 210{
83d5977e 211 return make_temp_ssa_name (type, stmt, "patt");
726a989a 212}
20f06221
DN
213
214/* Function vect_recog_dot_prod_pattern
215
216 Try to find the following pattern:
217
218 type x_t, y_t;
219 TYPE1 prod;
220 TYPE2 sum = init;
221 loop:
222 sum_0 = phi <init, sum_1>
223 S1 x_t = ...
224 S2 y_t = ...
225 S3 x_T = (TYPE1) x_t;
226 S4 y_T = (TYPE1) y_t;
227 S5 prod = x_T * y_T;
228 [S6 prod = (TYPE2) prod; #optional]
229 S7 sum_1 = prod + sum_0;
230
b8698a0f
L
231 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
232 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 233 computation.
b8698a0f 234
20f06221
DN
235 Input:
236
51312233
IR
237 * STMTS: Contains a stmt from which the pattern search begins. In the
238 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
239 will be detected.
20f06221
DN
240
241 Output:
242
243 * TYPE_IN: The type of the input arguments to the pattern.
244
245 * TYPE_OUT: The type of the output of this pattern.
246
247 * Return value: A new stmt that will be used to replace the sequence of
248 stmts that constitute the pattern. In this case it will be:
249 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
250
251 Note: The dot-prod idiom is a widening reduction pattern that is
252 vectorized without preserving all the intermediate results. It
253 produces only N/2 (widened) results (by summing up pairs of
254 intermediate results) rather than all N results. Therefore, we
255 cannot allow this pattern when we want to get all the results and in
256 the correct order (as is the case when this computation is in an
257 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 258
355fe088
TS
259static gimple *
260vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
51312233 261 tree *type_out)
20f06221 262{
355fe088 263 gimple *stmt, *last_stmt = (*stmts)[0];
20f06221
DN
264 tree oprnd0, oprnd1;
265 tree oprnd00, oprnd01;
51312233 266 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 267 tree type, half_type;
355fe088 268 gimple *pattern_stmt;
20f06221 269 tree prod_type;
d29de1bf 270 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 271 struct loop *loop;
f471fe72 272 tree var;
bc4fb355 273 bool promotion;
20f06221 274
f5709183
IR
275 if (!loop_info)
276 return NULL;
277
278 loop = LOOP_VINFO_LOOP (loop_info);
279
328dc477
RB
280 /* We don't allow changing the order of the computation in the inner-loop
281 when doing outer-loop vectorization. */
282 if (loop && nested_in_vect_loop_p (loop, last_stmt))
283 return NULL;
284
51312233 285 if (!is_gimple_assign (last_stmt))
20f06221
DN
286 return NULL;
287
51312233 288 type = gimple_expr_type (last_stmt);
20f06221 289
b8698a0f 290 /* Look for the following pattern
20f06221
DN
291 DX = (TYPE1) X;
292 DY = (TYPE1) Y;
b8698a0f 293 DPROD = DX * DY;
20f06221
DN
294 DDPROD = (TYPE2) DPROD;
295 sum_1 = DDPROD + sum_0;
b8698a0f 296 In which
20f06221
DN
297 - DX is double the size of X
298 - DY is double the size of Y
299 - DX, DY, DPROD all have the same type
300 - sum is the same size of DPROD or bigger
301 - sum has been recognized as a reduction variable.
302
303 This is equivalent to:
304 DPROD = X w* Y; #widen mult
305 sum_1 = DPROD w+ sum_0; #widen summation
306 or
307 DPROD = X w* Y; #widen mult
308 sum_1 = DPROD + sum_0; #summation
309 */
310
311 /* Starting from LAST_STMT, follow the defs of its uses in search
312 of the above pattern. */
313
51312233 314 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
315 return NULL;
316
317 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
318 {
319 /* Has been detected as widening-summation? */
320
321 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a
RB
322 type = gimple_expr_type (stmt);
323 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
20f06221 324 return NULL;
726a989a
RB
325 oprnd0 = gimple_assign_rhs1 (stmt);
326 oprnd1 = gimple_assign_rhs2 (stmt);
20f06221
DN
327 half_type = TREE_TYPE (oprnd0);
328 }
329 else
330 {
355fe088 331 gimple *def_stmt;
20f06221 332
b308d872
RB
333 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
334 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
335 return NULL;
51312233
IR
336 oprnd0 = gimple_assign_rhs1 (last_stmt);
337 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
338 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
339 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221 340 return NULL;
51312233 341 stmt = last_stmt;
20f06221 342
bc4fb355 343 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
2a2b8f64
JJ
344 &promotion)
345 && promotion)
20f06221
DN
346 {
347 stmt = def_stmt;
726a989a 348 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
349 }
350 else
351 half_type = type;
352 }
353
51312233 354 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
355 we know that oprnd1 is the reduction variable (defined by a loop-header
356 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
357 Left to check that oprnd0 is defined by a (widen_)mult_expr */
ba02d3bc
RG
358 if (TREE_CODE (oprnd0) != SSA_NAME)
359 return NULL;
20f06221
DN
360
361 prod_type = half_type;
362 stmt = SSA_NAME_DEF_STMT (oprnd0);
3cb35c12
CF
363
364 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
75264e61 365 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
3cb35c12
CF
366 return NULL;
367
b8698a0f 368 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 369 inside the loop (in case we are analyzing an outer-loop). */
726a989a 370 if (!is_gimple_assign (stmt))
b8698a0f 371 return NULL;
20f06221
DN
372 stmt_vinfo = vinfo_for_stmt (stmt);
373 gcc_assert (stmt_vinfo);
8644a673 374 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
b3130586 375 return NULL;
726a989a 376 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
20f06221
DN
377 return NULL;
378 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
379 {
380 /* Has been detected as a widening multiplication? */
381
382 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a 383 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
20f06221
DN
384 return NULL;
385 stmt_vinfo = vinfo_for_stmt (stmt);
386 gcc_assert (stmt_vinfo);
8644a673 387 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
726a989a
RB
388 oprnd00 = gimple_assign_rhs1 (stmt);
389 oprnd01 = gimple_assign_rhs2 (stmt);
56f8faae
CH
390 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
391 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
20f06221
DN
392 }
393 else
394 {
395 tree half_type0, half_type1;
355fe088 396 gimple *def_stmt;
20f06221
DN
397 tree oprnd0, oprnd1;
398
726a989a
RB
399 oprnd0 = gimple_assign_rhs1 (stmt);
400 oprnd1 = gimple_assign_rhs2 (stmt);
9600efe1
MM
401 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
402 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
20f06221 403 return NULL;
bc4fb355 404 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
2a2b8f64
JJ
405 &promotion)
406 || !promotion)
20f06221 407 return NULL;
726a989a 408 oprnd00 = gimple_assign_rhs1 (def_stmt);
181f5f3e 409 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
2a2b8f64
JJ
410 &promotion)
411 || !promotion)
20f06221 412 return NULL;
726a989a 413 oprnd01 = gimple_assign_rhs1 (def_stmt);
9600efe1 414 if (!types_compatible_p (half_type0, half_type1))
20f06221
DN
415 return NULL;
416 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
417 return NULL;
418 }
419
420 half_type = TREE_TYPE (oprnd00);
421 *type_in = half_type;
422 *type_out = type;
b8698a0f 423
20f06221 424 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 425 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
426 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
427 oprnd00, oprnd01, oprnd1);
b8698a0f 428
73fbfcad 429 if (dump_enabled_p ())
20f06221 430 {
ccb3ad87 431 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 432 "vect_recog_dot_prod_pattern: detected: ");
ccb3ad87 433 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 434 dump_printf (MSG_NOTE, "\n");
20f06221 435 }
d29de1bf 436
726a989a 437 return pattern_stmt;
20f06221 438}
b8698a0f 439
51312233 440
79d652a5
CH
441/* Function vect_recog_sad_pattern
442
443 Try to find the following Sum of Absolute Difference (SAD) pattern:
444
445 type x_t, y_t;
446 signed TYPE1 diff, abs_diff;
447 TYPE2 sum = init;
448 loop:
449 sum_0 = phi <init, sum_1>
450 S1 x_t = ...
451 S2 y_t = ...
452 S3 x_T = (TYPE1) x_t;
453 S4 y_T = (TYPE1) y_t;
454 S5 diff = x_T - y_T;
455 S6 abs_diff = ABS_EXPR <diff>;
456 [S7 abs_diff = (TYPE2) abs_diff; #optional]
457 S8 sum_1 = abs_diff + sum_0;
458
459 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
460 same size of 'TYPE1' or bigger. This is a special case of a reduction
461 computation.
462
463 Input:
464
465 * STMTS: Contains a stmt from which the pattern search begins. In the
466 example, when this function is called with S8, the pattern
467 {S3,S4,S5,S6,S7,S8} will be detected.
468
469 Output:
470
471 * TYPE_IN: The type of the input arguments to the pattern.
472
473 * TYPE_OUT: The type of the output of this pattern.
474
475 * Return value: A new stmt that will be used to replace the sequence of
476 stmts that constitute the pattern. In this case it will be:
477 SAD_EXPR <x_t, y_t, sum_0>
478 */
479
355fe088
TS
480static gimple *
481vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
79d652a5
CH
482 tree *type_out)
483{
355fe088 484 gimple *last_stmt = (*stmts)[0];
79d652a5
CH
485 tree sad_oprnd0, sad_oprnd1;
486 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
487 tree half_type;
488 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
489 struct loop *loop;
490 bool promotion;
491
492 if (!loop_info)
493 return NULL;
494
495 loop = LOOP_VINFO_LOOP (loop_info);
496
328dc477
RB
497 /* We don't allow changing the order of the computation in the inner-loop
498 when doing outer-loop vectorization. */
499 if (loop && nested_in_vect_loop_p (loop, last_stmt))
500 return NULL;
501
79d652a5
CH
502 if (!is_gimple_assign (last_stmt))
503 return NULL;
504
505 tree sum_type = gimple_expr_type (last_stmt);
506
507 /* Look for the following pattern
508 DX = (TYPE1) X;
509 DY = (TYPE1) Y;
510 DDIFF = DX - DY;
511 DAD = ABS_EXPR <DDIFF>;
512 DDPROD = (TYPE2) DPROD;
513 sum_1 = DAD + sum_0;
514 In which
515 - DX is at least double the size of X
516 - DY is at least double the size of Y
517 - DX, DY, DDIFF, DAD all have the same type
518 - sum is the same size of DAD or bigger
519 - sum has been recognized as a reduction variable.
520
521 This is equivalent to:
522 DDIFF = X w- Y; #widen sub
523 DAD = ABS_EXPR <DDIFF>;
524 sum_1 = DAD w+ sum_0; #widen summation
525 or
526 DDIFF = X w- Y; #widen sub
527 DAD = ABS_EXPR <DDIFF>;
528 sum_1 = DAD + sum_0; #summation
529 */
530
531 /* Starting from LAST_STMT, follow the defs of its uses in search
532 of the above pattern. */
533
534 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
535 return NULL;
536
537 tree plus_oprnd0, plus_oprnd1;
538
539 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
540 {
541 /* Has been detected as widening-summation? */
542
355fe088 543 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
79d652a5
CH
544 sum_type = gimple_expr_type (stmt);
545 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
546 return NULL;
547 plus_oprnd0 = gimple_assign_rhs1 (stmt);
548 plus_oprnd1 = gimple_assign_rhs2 (stmt);
549 half_type = TREE_TYPE (plus_oprnd0);
550 }
551 else
552 {
355fe088 553 gimple *def_stmt;
79d652a5 554
b308d872
RB
555 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
556 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
557 return NULL;
79d652a5
CH
558 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
559 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
560 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
561 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
562 return NULL;
563
564 /* The type conversion could be promotion, demotion,
565 or just signed -> unsigned. */
566 if (type_conversion_p (plus_oprnd0, last_stmt, false,
567 &half_type, &def_stmt, &promotion))
568 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
569 else
570 half_type = sum_type;
571 }
572
573 /* So far so good. Since last_stmt was detected as a (summation) reduction,
574 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
575 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
576 Then check that plus_oprnd0 is defined by an abs_expr. */
577
578 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
579 return NULL;
580
581 tree abs_type = half_type;
355fe088 582 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
79d652a5
CH
583
584 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
585 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
586 return NULL;
587
588 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
589 inside the loop (in case we are analyzing an outer-loop). */
590 if (!is_gimple_assign (abs_stmt))
591 return NULL;
592
593 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
594 gcc_assert (abs_stmt_vinfo);
595 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
596 return NULL;
597 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
598 return NULL;
599
600 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
601 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
602 return NULL;
603 if (TYPE_UNSIGNED (abs_type))
604 return NULL;
605
606 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
607
608 if (TREE_CODE (abs_oprnd) != SSA_NAME)
609 return NULL;
610
355fe088 611 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
79d652a5
CH
612
613 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
614 if (!gimple_bb (diff_stmt)
615 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
616 return NULL;
617
618 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
619 inside the loop (in case we are analyzing an outer-loop). */
620 if (!is_gimple_assign (diff_stmt))
621 return NULL;
622
623 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
624 gcc_assert (diff_stmt_vinfo);
625 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
626 return NULL;
627 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
628 return NULL;
629
630 tree half_type0, half_type1;
355fe088 631 gimple *def_stmt;
79d652a5
CH
632
633 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
634 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
635
636 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
637 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
638 return NULL;
639 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
640 &half_type0, &def_stmt, &promotion)
641 || !promotion)
642 return NULL;
643 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
644
645 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
646 &half_type1, &def_stmt, &promotion)
647 || !promotion)
648 return NULL;
649 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
650
651 if (!types_compatible_p (half_type0, half_type1))
652 return NULL;
653 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
654 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
655 return NULL;
656
657 *type_in = TREE_TYPE (sad_oprnd0);
658 *type_out = sum_type;
659
660 /* Pattern detected. Create a stmt to be used to replace the pattern: */
661 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
355fe088
TS
662 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
663 sad_oprnd1, plus_oprnd1);
79d652a5
CH
664
665 if (dump_enabled_p ())
666 {
667 dump_printf_loc (MSG_NOTE, vect_location,
668 "vect_recog_sad_pattern: detected: ");
669 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
670 dump_printf (MSG_NOTE, "\n");
671 }
672
79d652a5
CH
673 return pattern_stmt;
674}
675
676
36ba4aae
IR
677/* Handle widening operation by a constant. At the moment we support MULT_EXPR
678 and LSHIFT_EXPR.
679
680 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
681 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
51312233
IR
682
683 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
36ba4aae
IR
684 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
685 that satisfies the above restrictions, we can perform a widening opeartion
686 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
566d377a 687 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
51312233
IR
688
689static bool
355fe088 690vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
36ba4aae 691 tree const_oprnd, tree *oprnd,
355fe088
TS
692 gimple **wstmt, tree type,
693 tree *half_type, gimple *def_stmt)
51312233 694{
83d5977e 695 tree new_type, new_oprnd;
51312233 696
36ba4aae
IR
697 if (code != MULT_EXPR && code != LSHIFT_EXPR)
698 return false;
699
700 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
701 || (code == LSHIFT_EXPR
702 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
703 != 1))
704 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
51312233
IR
705 {
706 /* CONST_OPRND is a constant of HALF_TYPE. */
707 *oprnd = gimple_assign_rhs1 (def_stmt);
708 return true;
709 }
710
f71cf56a
UW
711 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
712 return false;
713
714 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
51312233
IR
715 return false;
716
36ba4aae 717 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
51312233
IR
718 a type 2 times bigger than HALF_TYPE. */
719 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
720 TYPE_UNSIGNED (type));
36ba4aae
IR
721 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
722 || (code == LSHIFT_EXPR
723 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
51312233
IR
724 return false;
725
566d377a
RB
726 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
727 *oprnd = gimple_assign_rhs1 (def_stmt);
728 new_oprnd = make_ssa_name (new_type);
729 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
730 *oprnd = new_oprnd;
51312233
IR
731
732 *half_type = new_type;
733 return true;
734}
735
736
20f06221
DN
737/* Function vect_recog_widen_mult_pattern
738
739 Try to find the following pattern:
740
d367387c
CH
741 type1 a_t;
742 type2 b_t;
20f06221
DN
743 TYPE a_T, b_T, prod_T;
744
745 S1 a_t = ;
746 S2 b_t = ;
747 S3 a_T = (TYPE) a_t;
748 S4 b_T = (TYPE) b_t;
749 S5 prod_T = a_T * b_T;
750
d367387c 751 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
20f06221 752
d47657bd 753 Also detect unsigned cases:
383d9c83 754
d367387c
CH
755 unsigned type1 a_t;
756 unsigned type2 b_t;
383d9c83
IR
757 unsigned TYPE u_prod_T;
758 TYPE a_T, b_T, prod_T;
759
760 S1 a_t = ;
761 S2 b_t = ;
762 S3 a_T = (TYPE) a_t;
763 S4 b_T = (TYPE) b_t;
764 S5 prod_T = a_T * b_T;
765 S6 u_prod_T = (unsigned TYPE) prod_T;
766
767 and multiplication by constants:
768
769 type a_t;
770 TYPE a_T, prod_T;
771
772 S1 a_t = ;
773 S3 a_T = (TYPE) a_t;
774 S5 prod_T = a_T * CONST;
775
51312233
IR
776 A special case of multiplication by constants is when 'TYPE' is 4 times
777 bigger than 'type', but CONST fits an intermediate type 2 times smaller
778 than 'TYPE'. In that case we create an additional pattern stmt for S3
779 to create a variable of the intermediate type, and perform widen-mult
780 on the intermediate type as well:
781
782 type a_t;
783 interm_type a_it;
784 TYPE a_T, prod_T, prod_T';
785
786 S1 a_t = ;
787 S3 a_T = (TYPE) a_t;
788 '--> a_it = (interm_type) a_t;
789 S5 prod_T = a_T * CONST;
790 '--> prod_T' = a_it w* CONST;
20f06221 791
51312233
IR
792 Input/Output:
793
794 * STMTS: Contains a stmt from which the pattern search begins. In the
795 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
796 is detected. In case of unsigned widen-mult, the original stmt (S5) is
797 replaced with S6 in STMTS. In case of multiplication by a constant
798 of an intermediate type (the last case above), STMTS also contains S3
799 (inserted before S5).
20f06221
DN
800
801 Output:
802
803 * TYPE_IN: The type of the input arguments to the pattern.
804
383d9c83 805 * TYPE_OUT: The type of the output of this pattern.
20f06221
DN
806
807 * Return value: A new stmt that will be used to replace the sequence of
383d9c83 808 stmts that constitute the pattern. In this case it will be:
20f06221 809 WIDEN_MULT <a_t, b_t>
d367387c
CH
810 If the result of WIDEN_MULT needs to be converted to a larger type, the
811 returned stmt will be this type conversion stmt.
20f06221
DN
812*/
813
355fe088
TS
814static gimple *
815vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
51312233 816 tree *type_in, tree *type_out)
20f06221 817{
355fe088
TS
818 gimple *last_stmt = stmts->pop ();
819 gimple *def_stmt0, *def_stmt1;
89d67cca
DN
820 tree oprnd0, oprnd1;
821 tree type, half_type0, half_type1;
355fe088 822 gimple *new_stmt = NULL, *pattern_stmt = NULL;
d367387c 823 tree vectype, vecitype;
726a989a 824 tree var;
89d67cca 825 enum tree_code dummy_code;
5d593372 826 int dummy_int;
9771b263 827 vec<tree> dummy_vec;
36ba4aae 828 bool op1_ok;
bc4fb355 829 bool promotion;
89d67cca 830
51312233 831 if (!is_gimple_assign (last_stmt))
89d67cca
DN
832 return NULL;
833
51312233 834 type = gimple_expr_type (last_stmt);
89d67cca
DN
835
836 /* Starting from LAST_STMT, follow the defs of its uses in search
837 of the above pattern. */
838
51312233 839 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
89d67cca
DN
840 return NULL;
841
51312233
IR
842 oprnd0 = gimple_assign_rhs1 (last_stmt);
843 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
844 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
845 || !types_compatible_p (TREE_TYPE (oprnd1), type))
89d67cca
DN
846 return NULL;
847
383d9c83 848 /* Check argument 0. */
bc4fb355
IR
849 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
850 &promotion)
851 || !promotion)
852 return NULL;
383d9c83 853 /* Check argument 1. */
bc4fb355
IR
854 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
855 &def_stmt1, &promotion);
89d67cca 856
bc4fb355 857 if (op1_ok && promotion)
383d9c83
IR
858 {
859 oprnd0 = gimple_assign_rhs1 (def_stmt0);
860 oprnd1 = gimple_assign_rhs1 (def_stmt1);
861 }
36ba4aae 862 else
383d9c83 863 {
51312233 864 if (TREE_CODE (oprnd1) == INTEGER_CST
383d9c83 865 && TREE_CODE (half_type0) == INTEGER_TYPE
36ba4aae 866 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
566d377a 867 &oprnd0, &new_stmt, type,
36ba4aae 868 &half_type0, def_stmt0))
bfdeda2c
JJ
869 {
870 half_type1 = half_type0;
871 oprnd1 = fold_convert (half_type1, oprnd1);
872 }
383d9c83
IR
873 else
874 return NULL;
875 }
876
d367387c
CH
877 /* If the two arguments have different sizes, convert the one with
878 the smaller type into the larger type. */
879 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
880 {
566d377a
RB
881 /* If we already used up the single-stmt slot give up. */
882 if (new_stmt)
883 return NULL;
884
d367387c 885 tree* oprnd = NULL;
355fe088 886 gimple *def_stmt = NULL;
d367387c
CH
887
888 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
889 {
890 def_stmt = def_stmt0;
891 half_type0 = half_type1;
892 oprnd = &oprnd0;
893 }
894 else
895 {
896 def_stmt = def_stmt1;
897 half_type1 = half_type0;
898 oprnd = &oprnd1;
899 }
900
2a2b8f64
JJ
901 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
902 tree new_oprnd = make_ssa_name (half_type0);
903 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
904 *oprnd = new_oprnd;
d367387c
CH
905 }
906
383d9c83
IR
907 /* Handle unsigned case. Look for
908 S6 u_prod_T = (unsigned TYPE) prod_T;
909 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
910 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
911 {
355fe088 912 gimple *use_stmt;
9a7a4398 913 tree use_lhs;
383d9c83
IR
914 tree use_type;
915
916 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
917 return NULL;
918
9a7a4398
UW
919 use_stmt = vect_single_imm_use (last_stmt);
920 if (!use_stmt || !is_gimple_assign (use_stmt)
625a9766 921 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
383d9c83
IR
922 return NULL;
923
924 use_lhs = gimple_assign_lhs (use_stmt);
925 use_type = TREE_TYPE (use_lhs);
926 if (!INTEGRAL_TYPE_P (use_type)
927 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
928 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
929 return NULL;
930
931 type = use_type;
51312233 932 last_stmt = use_stmt;
383d9c83 933 }
89d67cca 934
9600efe1 935 if (!types_compatible_p (half_type0, half_type1))
89d67cca
DN
936 return NULL;
937
d367387c
CH
938 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
939 to get an intermediate result of type ITYPE. In this case we need
940 to build a statement to convert this intermediate result to type TYPE. */
941 tree itype = type;
942 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
943 itype = build_nonstandard_integer_type
944 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
945 TYPE_UNSIGNED (type));
946
89d67cca 947 /* Pattern detected. */
73fbfcad 948 if (dump_enabled_p ())
ccb3ad87 949 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 950 "vect_recog_widen_mult_pattern: detected:\n");
89d67cca
DN
951
952 /* Check target support */
953 vectype = get_vectype_for_scalar_type (half_type0);
d367387c 954 vecitype = get_vectype_for_scalar_type (itype);
03d3e953 955 if (!vectype
d367387c 956 || !vecitype
51312233 957 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
d367387c 958 vecitype, vectype,
a86ec597
RH
959 &dummy_code, &dummy_code,
960 &dummy_int, &dummy_vec))
89d67cca
DN
961 return NULL;
962
963 *type_in = vectype;
d367387c 964 *type_out = get_vectype_for_scalar_type (type);
89d67cca
DN
965
966 /* Pattern supported. Create a stmt to be used to replace the pattern: */
d367387c 967 var = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 968 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
726a989a 969
d367387c 970 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
d367387c
CH
971 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
972
973 /* If the original two operands have different sizes, we may need to convert
974 the smaller one into the larget type. If this is the case, at this point
975 the new stmt is already built. */
976 if (new_stmt)
977 {
978 append_pattern_def_seq (stmt_vinfo, new_stmt);
979 stmt_vec_info new_stmt_info
310213d4 980 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
d367387c
CH
981 set_vinfo_for_stmt (new_stmt, new_stmt_info);
982 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
983 }
984
985 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
986 the result of the widen-mult operation into type TYPE. */
987 if (itype != type)
988 {
989 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
990 stmt_vec_info pattern_stmt_info
310213d4 991 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
d367387c
CH
992 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
993 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
0d0e4a03
JJ
994 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
995 NOP_EXPR,
996 gimple_assign_lhs (pattern_stmt));
d367387c
CH
997 }
998
73fbfcad 999 if (dump_enabled_p ())
78c60e3d 1000 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
726a989a 1001
9771b263 1002 stmts->safe_push (last_stmt);
726a989a 1003 return pattern_stmt;
20f06221
DN
1004}
1005
1006
0b2229b0
RG
1007/* Function vect_recog_pow_pattern
1008
1009 Try to find the following pattern:
1010
1011 x = POW (y, N);
1012
1013 with POW being one of pow, powf, powi, powif and N being
1014 either 2 or 0.5.
1015
1016 Input:
1017
1018 * LAST_STMT: A stmt from which the pattern search begins.
1019
1020 Output:
1021
1022 * TYPE_IN: The type of the input arguments to the pattern.
1023
1024 * TYPE_OUT: The type of the output of this pattern.
1025
1026 * Return value: A new stmt that will be used to replace the sequence of
1027 stmts that constitute the pattern. In this case it will be:
726a989a 1028 x = x * x
0b2229b0 1029 or
726a989a 1030 x = sqrt (x)
0b2229b0
RG
1031*/
1032
355fe088
TS
1033static gimple *
1034vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
51312233 1035 tree *type_out)
0b2229b0 1036{
355fe088 1037 gimple *last_stmt = (*stmts)[0];
7a31e5ef 1038 tree base, exp = NULL;
355fe088 1039 gimple *stmt;
726a989a 1040 tree var;
0b2229b0 1041
51312233 1042 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
0b2229b0
RG
1043 return NULL;
1044
7a31e5ef 1045 switch (gimple_call_combined_fn (last_stmt))
0b2229b0 1046 {
7a31e5ef
RS
1047 CASE_CFN_POW:
1048 CASE_CFN_POWI:
51312233
IR
1049 base = gimple_call_arg (last_stmt, 0);
1050 exp = gimple_call_arg (last_stmt, 1);
0b2229b0
RG
1051 if (TREE_CODE (exp) != REAL_CST
1052 && TREE_CODE (exp) != INTEGER_CST)
726a989a 1053 return NULL;
0b2229b0
RG
1054 break;
1055
726a989a
RB
1056 default:
1057 return NULL;
0b2229b0
RG
1058 }
1059
1060 /* We now have a pow or powi builtin function call with a constant
1061 exponent. */
1062
0b2229b0
RG
1063 *type_out = NULL_TREE;
1064
1065 /* Catch squaring. */
9541ffee 1066 if ((tree_fits_shwi_p (exp)
9439e9a1 1067 && tree_to_shwi (exp) == 2)
0b2229b0 1068 || (TREE_CODE (exp) == REAL_CST
624d31fe 1069 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
c6b1b49b
RG
1070 {
1071 *type_in = TREE_TYPE (base);
726a989a
RB
1072
1073 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
0d0e4a03 1074 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
726a989a 1075 return stmt;
c6b1b49b 1076 }
0b2229b0
RG
1077
1078 /* Catch square root. */
1079 if (TREE_CODE (exp) == REAL_CST
624d31fe 1080 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
0b2229b0 1081 {
c6b1b49b 1082 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
d95ab70a
RS
1083 if (*type_in
1084 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1085 OPTIMIZE_FOR_SPEED))
c6b1b49b 1086 {
b4e5bc47
RS
1087 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1088 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1089 gimple_call_set_lhs (stmt, var);
1090 return stmt;
c6b1b49b 1091 }
0b2229b0
RG
1092 }
1093
726a989a 1094 return NULL;
0b2229b0
RG
1095}
1096
1097
20f06221
DN
1098/* Function vect_recog_widen_sum_pattern
1099
1100 Try to find the following pattern:
1101
b8698a0f 1102 type x_t;
20f06221
DN
1103 TYPE x_T, sum = init;
1104 loop:
1105 sum_0 = phi <init, sum_1>
1106 S1 x_t = *p;
1107 S2 x_T = (TYPE) x_t;
1108 S3 sum_1 = x_T + sum_0;
1109
b8698a0f 1110 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 1111 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 1112 a special case of a reduction computation.
20f06221
DN
1113
1114 Input:
1115
1116 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1117 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 1118
20f06221 1119 Output:
b8698a0f 1120
20f06221
DN
1121 * TYPE_IN: The type of the input arguments to the pattern.
1122
1123 * TYPE_OUT: The type of the output of this pattern.
1124
1125 * Return value: A new stmt that will be used to replace the sequence of
1126 stmts that constitute the pattern. In this case it will be:
1127 WIDEN_SUM <x_t, sum_0>
d29de1bf 1128
b8698a0f 1129 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 1130 vectorized without preserving all the intermediate results. It
b8698a0f
L
1131 produces only N/2 (widened) results (by summing up pairs of
1132 intermediate results) rather than all N results. Therefore, we
1133 cannot allow this pattern when we want to get all the results and in
1134 the correct order (as is the case when this computation is in an
d29de1bf 1135 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 1136
355fe088
TS
1137static gimple *
1138vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
51312233 1139 tree *type_out)
20f06221 1140{
355fe088 1141 gimple *stmt, *last_stmt = (*stmts)[0];
20f06221 1142 tree oprnd0, oprnd1;
51312233 1143 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 1144 tree type, half_type;
355fe088 1145 gimple *pattern_stmt;
d29de1bf 1146 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 1147 struct loop *loop;
726a989a 1148 tree var;
bc4fb355 1149 bool promotion;
20f06221 1150
f5709183
IR
1151 if (!loop_info)
1152 return NULL;
1153
1154 loop = LOOP_VINFO_LOOP (loop_info);
1155
328dc477
RB
1156 /* We don't allow changing the order of the computation in the inner-loop
1157 when doing outer-loop vectorization. */
1158 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1159 return NULL;
1160
51312233 1161 if (!is_gimple_assign (last_stmt))
20f06221
DN
1162 return NULL;
1163
51312233 1164 type = gimple_expr_type (last_stmt);
20f06221
DN
1165
1166 /* Look for the following pattern
1167 DX = (TYPE) X;
1168 sum_1 = DX + sum_0;
1169 In which DX is at least double the size of X, and sum_1 has been
1170 recognized as a reduction variable.
1171 */
1172
1173 /* Starting from LAST_STMT, follow the defs of its uses in search
1174 of the above pattern. */
1175
51312233 1176 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
1177 return NULL;
1178
b308d872
RB
1179 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
1180 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
1181 return NULL;
1182
51312233
IR
1183 oprnd0 = gimple_assign_rhs1 (last_stmt);
1184 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
1185 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1186 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221
DN
1187 return NULL;
1188
51312233 1189 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
1190 we know that oprnd1 is the reduction variable (defined by a loop-header
1191 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1192 Left to check that oprnd0 is defined by a cast from type 'type' to type
1193 'TYPE'. */
1194
bc4fb355
IR
1195 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1196 &promotion)
1197 || !promotion)
1198 return NULL;
20f06221 1199
726a989a 1200 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
1201 *type_in = half_type;
1202 *type_out = type;
1203
1204 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 1205 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1206 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
726a989a 1207
73fbfcad 1208 if (dump_enabled_p ())
20f06221 1209 {
ccb3ad87 1210 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1211 "vect_recog_widen_sum_pattern: detected: ");
ccb3ad87 1212 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1213 dump_printf (MSG_NOTE, "\n");
20f06221 1214 }
d29de1bf 1215
726a989a 1216 return pattern_stmt;
20f06221
DN
1217}
1218
1219
1107f3ae
IR
1220/* Return TRUE if the operation in STMT can be performed on a smaller type.
1221
1222 Input:
1223 STMT - a statement to check.
1224 DEF - we support operations with two operands, one of which is constant.
1225 The other operand can be defined by a demotion operation, or by a
1226 previous statement in a sequence of over-promoted operations. In the
1227 later case DEF is used to replace that operand. (It is defined by a
1228 pattern statement we created for the previous statement in the
1229 sequence).
1230
1231 Input/output:
1232 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1233 NULL, it's the type of DEF.
1234 STMTS - additional pattern statements. If a pattern statement (type
1235 conversion) is created in this function, its original statement is
1236 added to STMTS.
1237
1238 Output:
1239 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1240 operands to use in the new pattern statement for STMT (will be created
1241 in vect_recog_over_widening_pattern ()).
1242 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1243 statements for STMT: the first one is a type promotion and the second
1244 one is the operation itself. We return the type promotion statement
363477c0 1245 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1107f3ae
IR
1246 the second pattern statement. */
1247
1248static bool
355fe088
TS
1249vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1250 tree *op0, tree *op1, gimple **new_def_stmt,
1251 vec<gimple *> *stmts)
1107f3ae
IR
1252{
1253 enum tree_code code;
1254 tree const_oprnd, oprnd;
83d5977e 1255 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
355fe088 1256 gimple *def_stmt, *new_stmt;
1107f3ae 1257 bool first = false;
bc4fb355 1258 bool promotion;
f5709183 1259
d6e1acf6
JJ
1260 *op0 = NULL_TREE;
1261 *op1 = NULL_TREE;
1107f3ae
IR
1262 *new_def_stmt = NULL;
1263
1264 if (!is_gimple_assign (stmt))
1265 return false;
1266
1267 code = gimple_assign_rhs_code (stmt);
1268 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1269 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1270 return false;
1271
1272 oprnd = gimple_assign_rhs1 (stmt);
1273 const_oprnd = gimple_assign_rhs2 (stmt);
1274 type = gimple_expr_type (stmt);
1275
1276 if (TREE_CODE (oprnd) != SSA_NAME
1277 || TREE_CODE (const_oprnd) != INTEGER_CST)
1278 return false;
1279
9ef7adc0
RG
1280 /* If oprnd has other uses besides that in stmt we cannot mark it
1281 as being part of a pattern only. */
1282 if (!has_single_use (oprnd))
1283 return false;
1284
1107f3ae
IR
1285 /* If we are in the middle of a sequence, we use DEF from a previous
1286 statement. Otherwise, OPRND has to be a result of type promotion. */
1287 if (*new_type)
1288 {
1289 half_type = *new_type;
1290 oprnd = def;
1291 }
1292 else
1293 {
1294 first = true;
bc4fb355 1295 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
f71cf56a
UW
1296 &promotion)
1297 || !promotion
1298 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1107f3ae
IR
1299 return false;
1300 }
1301
1302 /* Can we perform the operation on a smaller type? */
1303 switch (code)
1304 {
1305 case BIT_IOR_EXPR:
1306 case BIT_XOR_EXPR:
1307 case BIT_AND_EXPR:
1308 if (!int_fits_type_p (const_oprnd, half_type))
1309 {
1310 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1311 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1312 return false;
1313
1314 interm_type = build_nonstandard_integer_type (
1315 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1316 if (!int_fits_type_p (const_oprnd, interm_type))
1317 return false;
1318 }
1319
1320 break;
1321
1322 case LSHIFT_EXPR:
1323 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1324 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1325 return false;
1326
1327 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1328 (e.g., if the original value was char, the shift amount is at most 8
1329 if we want to use short). */
1330 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1331 return false;
1332
1333 interm_type = build_nonstandard_integer_type (
1334 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1335
1336 if (!vect_supportable_shift (code, interm_type))
1337 return false;
1338
1339 break;
1340
1341 case RSHIFT_EXPR:
1342 if (vect_supportable_shift (code, half_type))
1343 break;
1344
1345 /* Try intermediate type - HALF_TYPE is not supported. */
1346 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1347 return false;
1348
1349 interm_type = build_nonstandard_integer_type (
1350 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1351
1352 if (!vect_supportable_shift (code, interm_type))
1353 return false;
1354
1355 break;
1356
1357 default:
1358 gcc_unreachable ();
1359 }
1360
1361 /* There are four possible cases:
1362 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1363 the first statement in the sequence)
1364 a. The original, HALF_TYPE, is not enough - we replace the promotion
1365 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1366 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1367 promotion.
1368 2. OPRND is defined by a pattern statement we created.
1369 a. Its type is not sufficient for the operation, we create a new stmt:
1370 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1371 this statement in NEW_DEF_STMT, and it is later put in
363477c0 1372 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1107f3ae
IR
1373 b. OPRND is good to use in the new statement. */
1374 if (first)
1375 {
1376 if (interm_type)
1377 {
1378 /* Replace the original type conversion HALF_TYPE->TYPE with
1379 HALF_TYPE->INTERM_TYPE. */
1380 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1381 {
1382 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1383 /* Check if the already created pattern stmt is what we need. */
1384 if (!is_gimple_assign (new_stmt)
625a9766 1385 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1107f3ae
IR
1386 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1387 return false;
1388
9771b263 1389 stmts->safe_push (def_stmt);
1107f3ae
IR
1390 oprnd = gimple_assign_lhs (new_stmt);
1391 }
1392 else
1393 {
1394 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1395 oprnd = gimple_assign_rhs1 (def_stmt);
b731b390 1396 new_oprnd = make_ssa_name (interm_type);
0d0e4a03 1397 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1107f3ae 1398 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
9771b263 1399 stmts->safe_push (def_stmt);
1107f3ae
IR
1400 oprnd = new_oprnd;
1401 }
1402 }
1403 else
1404 {
1405 /* Retrieve the operand before the type promotion. */
1406 oprnd = gimple_assign_rhs1 (def_stmt);
1407 }
1408 }
1409 else
1410 {
1411 if (interm_type)
1412 {
1413 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
b731b390 1414 new_oprnd = make_ssa_name (interm_type);
0d0e4a03 1415 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1107f3ae
IR
1416 oprnd = new_oprnd;
1417 *new_def_stmt = new_stmt;
1418 }
1419
1420 /* Otherwise, OPRND is already set. */
1421 }
1422
1423 if (interm_type)
1424 *new_type = interm_type;
1425 else
1426 *new_type = half_type;
1427
1428 *op0 = oprnd;
1429 *op1 = fold_convert (*new_type, const_oprnd);
1430
1431 return true;
1432}
1433
1434
1435/* Try to find a statement or a sequence of statements that can be performed
1436 on a smaller type:
1437
1438 type x_t;
1439 TYPE x_T, res0_T, res1_T;
1440 loop:
1441 S1 x_t = *p;
1442 S2 x_T = (TYPE) x_t;
1443 S3 res0_T = op (x_T, C0);
1444 S4 res1_T = op (res0_T, C1);
1445 S5 ... = () res1_T; - type demotion
1446
1447 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1448 constants.
1449 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1450 be 'type' or some intermediate type. For now, we expect S5 to be a type
71c92d17 1451 demotion operation. We also check that S3 and S4 have only one use. */
1107f3ae 1452
355fe088
TS
1453static gimple *
1454vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1107f3ae
IR
1455 tree *type_in, tree *type_out)
1456{
355fe088
TS
1457 gimple *stmt = stmts->pop ();
1458 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1459 *use_stmt = NULL;
9a7a4398 1460 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
83d5977e 1461 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1107f3ae 1462 bool first;
b2a1a74d 1463 tree type = NULL;
1107f3ae
IR
1464
1465 first = true;
1466 while (1)
1467 {
1468 if (!vinfo_for_stmt (stmt)
1469 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1470 return NULL;
1471
1472 new_def_stmt = NULL;
1473 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1474 &op0, &op1, &new_def_stmt,
1475 stmts))
1476 {
1477 if (first)
1478 return NULL;
1479 else
1480 break;
1481 }
1482
1483 /* STMT can be performed on a smaller type. Check its uses. */
9a7a4398
UW
1484 use_stmt = vect_single_imm_use (stmt);
1485 if (!use_stmt || !is_gimple_assign (use_stmt))
1107f3ae
IR
1486 return NULL;
1487
1488 /* Create pattern statement for STMT. */
1489 vectype = get_vectype_for_scalar_type (new_type);
1490 if (!vectype)
1491 return NULL;
1492
1493 /* We want to collect all the statements for which we create pattern
1494 statetments, except for the case when the last statement in the
1495 sequence doesn't have a corresponding pattern statement. In such
1496 case we associate the last pattern statement with the last statement
36ba4aae 1497 in the sequence. Therefore, we only add the original statement to
1107f3ae
IR
1498 the list if we know that it is not the last. */
1499 if (prev_stmt)
9771b263 1500 stmts->safe_push (prev_stmt);
1107f3ae
IR
1501
1502 var = vect_recog_temp_ssa_var (new_type, NULL);
62371b92 1503 pattern_stmt
0d0e4a03 1504 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1107f3ae 1505 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
083481d8 1506 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1107f3ae 1507
73fbfcad 1508 if (dump_enabled_p ())
1107f3ae 1509 {
ccb3ad87 1510 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1511 "created pattern stmt: ");
ccb3ad87 1512 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1513 dump_printf (MSG_NOTE, "\n");
1107f3ae
IR
1514 }
1515
b2a1a74d 1516 type = gimple_expr_type (stmt);
1107f3ae
IR
1517 prev_stmt = stmt;
1518 stmt = use_stmt;
1519
1520 first = false;
1521 }
1522
1523 /* We got a sequence. We expect it to end with a type demotion operation.
1524 Otherwise, we quit (for now). There are three possible cases: the
1525 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1526 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1527 NEW_TYPE differs (we create a new conversion statement). */
1528 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1529 {
1530 use_lhs = gimple_assign_lhs (use_stmt);
1531 use_type = TREE_TYPE (use_lhs);
82db3d43 1532 /* Support only type demotion or signedess change. */
1107f3ae 1533 if (!INTEGRAL_TYPE_P (use_type)
82db3d43 1534 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1107f3ae
IR
1535 return NULL;
1536
82db3d43
IR
1537 /* Check that NEW_TYPE is not bigger than the conversion result. */
1538 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1539 return NULL;
1540
1107f3ae
IR
1541 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1542 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1543 {
1544 /* Create NEW_TYPE->USE_TYPE conversion. */
b731b390 1545 new_oprnd = make_ssa_name (use_type);
0d0e4a03 1546 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1107f3ae
IR
1547 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1548
1549 *type_in = get_vectype_for_scalar_type (new_type);
1550 *type_out = get_vectype_for_scalar_type (use_type);
1551
1552 /* We created a pattern statement for the last statement in the
1553 sequence, so we don't need to associate it with the pattern
1554 statement created for PREV_STMT. Therefore, we add PREV_STMT
1555 to the list in order to mark it later in vect_pattern_recog_1. */
1556 if (prev_stmt)
9771b263 1557 stmts->safe_push (prev_stmt);
1107f3ae
IR
1558 }
1559 else
1560 {
1561 if (prev_stmt)
363477c0
JJ
1562 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1563 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1107f3ae
IR
1564
1565 *type_in = vectype;
1566 *type_out = NULL_TREE;
1567 }
1568
9771b263 1569 stmts->safe_push (use_stmt);
1107f3ae
IR
1570 }
1571 else
1572 /* TODO: support general case, create a conversion to the correct type. */
1573 return NULL;
1574
1575 /* Pattern detected. */
73fbfcad 1576 if (dump_enabled_p ())
1107f3ae 1577 {
ccb3ad87 1578 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1579 "vect_recog_over_widening_pattern: detected: ");
ccb3ad87 1580 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1581 dump_printf (MSG_NOTE, "\n");
1107f3ae
IR
1582 }
1583
1584 return pattern_stmt;
1585}
1586
36ba4aae
IR
1587/* Detect widening shift pattern:
1588
1589 type a_t;
1590 TYPE a_T, res_T;
1591
1592 S1 a_t = ;
1593 S2 a_T = (TYPE) a_t;
1594 S3 res_T = a_T << CONST;
1595
1596 where type 'TYPE' is at least double the size of type 'type'.
1597
33018845
UW
1598 Also detect cases where the shift result is immediately converted
1599 to another type 'result_type' that is no larger in size than 'TYPE'.
1600 In those cases we perform a widen-shift that directly results in
1601 'result_type', to avoid a possible over-widening situation:
36ba4aae 1602
33018845 1603 type a_t;
36ba4aae 1604 TYPE a_T, res_T;
33018845 1605 result_type res_result;
36ba4aae
IR
1606
1607 S1 a_t = ;
1608 S2 a_T = (TYPE) a_t;
1609 S3 res_T = a_T << CONST;
33018845
UW
1610 S4 res_result = (result_type) res_T;
1611 '--> res_result' = a_t w<< CONST;
36ba4aae
IR
1612
1613 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1614 create an additional pattern stmt for S2 to create a variable of an
1615 intermediate type, and perform widen-shift on the intermediate type:
1616
1617 type a_t;
1618 interm_type a_it;
1619 TYPE a_T, res_T, res_T';
1620
1621 S1 a_t = ;
1622 S2 a_T = (TYPE) a_t;
1623 '--> a_it = (interm_type) a_t;
1624 S3 res_T = a_T << CONST;
1625 '--> res_T' = a_it <<* CONST;
1626
1627 Input/Output:
1628
1629 * STMTS: Contains a stmt from which the pattern search begins.
1630 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1631 in STMTS. When an intermediate type is used and a pattern statement is
1632 created for S2, we also put S2 here (before S3).
1633
1634 Output:
1635
1636 * TYPE_IN: The type of the input arguments to the pattern.
1637
1638 * TYPE_OUT: The type of the output of this pattern.
1639
1640 * Return value: A new stmt that will be used to replace the sequence of
1641 stmts that constitute the pattern. In this case it will be:
1642 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1643
355fe088
TS
1644static gimple *
1645vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
36ba4aae
IR
1646 tree *type_in, tree *type_out)
1647{
355fe088
TS
1648 gimple *last_stmt = stmts->pop ();
1649 gimple *def_stmt0;
36ba4aae
IR
1650 tree oprnd0, oprnd1;
1651 tree type, half_type0;
355fe088 1652 gimple *pattern_stmt;
36ba4aae 1653 tree vectype, vectype_out = NULL_TREE;
36ba4aae
IR
1654 tree var;
1655 enum tree_code dummy_code;
1656 int dummy_int;
9771b263 1657 vec<tree> dummy_vec;
355fe088 1658 gimple *use_stmt;
bc4fb355 1659 bool promotion;
36ba4aae
IR
1660
1661 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1662 return NULL;
1663
36ba4aae 1664 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
33018845 1665 return NULL;
36ba4aae
IR
1666
1667 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1668 return NULL;
1669
1670 oprnd0 = gimple_assign_rhs1 (last_stmt);
1671 oprnd1 = gimple_assign_rhs2 (last_stmt);
1672 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1673 return NULL;
1674
1675 /* Check operand 0: it has to be defined by a type promotion. */
bc4fb355 1676 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
566d377a 1677 &promotion)
bc4fb355
IR
1678 || !promotion)
1679 return NULL;
36ba4aae
IR
1680
1681 /* Check operand 1: has to be positive. We check that it fits the type
1682 in vect_handle_widen_op_by_const (). */
1683 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1684 return NULL;
1685
1686 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1687 type = gimple_expr_type (last_stmt);
1688
33018845
UW
1689 /* Check for subsequent conversion to another type. */
1690 use_stmt = vect_single_imm_use (last_stmt);
1691 if (use_stmt && is_gimple_assign (use_stmt)
1692 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1693 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1694 {
1695 tree use_lhs = gimple_assign_lhs (use_stmt);
1696 tree use_type = TREE_TYPE (use_lhs);
1697
1698 if (INTEGRAL_TYPE_P (use_type)
1699 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1700 {
1701 last_stmt = use_stmt;
1702 type = use_type;
1703 }
1704 }
1705
36ba4aae 1706 /* Check if this a widening operation. */
355fe088 1707 gimple *wstmt = NULL;
36ba4aae 1708 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
566d377a 1709 &oprnd0, &wstmt,
36ba4aae
IR
1710 type, &half_type0, def_stmt0))
1711 return NULL;
1712
36ba4aae 1713 /* Pattern detected. */
73fbfcad 1714 if (dump_enabled_p ())
ccb3ad87 1715 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1716 "vect_recog_widen_shift_pattern: detected:\n");
36ba4aae
IR
1717
1718 /* Check target support. */
1719 vectype = get_vectype_for_scalar_type (half_type0);
1720 vectype_out = get_vectype_for_scalar_type (type);
1721
1722 if (!vectype
1723 || !vectype_out
1724 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1725 vectype_out, vectype,
a86ec597
RH
1726 &dummy_code, &dummy_code,
1727 &dummy_int, &dummy_vec))
36ba4aae
IR
1728 return NULL;
1729
1730 *type_in = vectype;
1731 *type_out = vectype_out;
1732
1733 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1734 var = vect_recog_temp_ssa_var (type, NULL);
1735 pattern_stmt =
0d0e4a03 1736 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
566d377a
RB
1737 if (wstmt)
1738 {
1739 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
566d377a
RB
1740 new_pattern_def_seq (stmt_vinfo, wstmt);
1741 stmt_vec_info new_stmt_info
310213d4 1742 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
566d377a
RB
1743 set_vinfo_for_stmt (wstmt, new_stmt_info);
1744 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1745 }
36ba4aae 1746
73fbfcad 1747 if (dump_enabled_p ())
78c60e3d 1748 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
7e9a3abb
JJ
1749
1750 stmts->safe_push (last_stmt);
1751 return pattern_stmt;
1752}
1753
1754/* Detect a rotate pattern wouldn't be otherwise vectorized:
1755
1756 type a_t, b_t, c_t;
1757
1758 S0 a_t = b_t r<< c_t;
1759
1760 Input/Output:
1761
1762 * STMTS: Contains a stmt from which the pattern search begins,
1763 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1764 with a sequence:
1765
1766 S1 d_t = -c_t;
1767 S2 e_t = d_t & (B - 1);
1768 S3 f_t = b_t << c_t;
1769 S4 g_t = b_t >> e_t;
1770 S0 a_t = f_t | g_t;
1771
1772 where B is element bitsize of type.
1773
1774 Output:
1775
1776 * TYPE_IN: The type of the input arguments to the pattern.
1777
1778 * TYPE_OUT: The type of the output of this pattern.
1779
1780 * Return value: A new stmt that will be used to replace the rotate
1781 S0 stmt. */
1782
355fe088
TS
1783static gimple *
1784vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
7e9a3abb 1785{
355fe088 1786 gimple *last_stmt = stmts->pop ();
7e9a3abb 1787 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
355fe088 1788 gimple *pattern_stmt, *def_stmt;
7e9a3abb
JJ
1789 enum tree_code rhs_code;
1790 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
310213d4 1791 vec_info *vinfo = stmt_vinfo->vinfo;
7e9a3abb
JJ
1792 enum vect_def_type dt;
1793 optab optab1, optab2;
68119618 1794 edge ext_def = NULL;
7e9a3abb
JJ
1795
1796 if (!is_gimple_assign (last_stmt))
1797 return NULL;
1798
1799 rhs_code = gimple_assign_rhs_code (last_stmt);
1800 switch (rhs_code)
1801 {
1802 case LROTATE_EXPR:
1803 case RROTATE_EXPR:
1804 break;
1805 default:
1806 return NULL;
1807 }
1808
1809 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1810 return NULL;
1811
1812 lhs = gimple_assign_lhs (last_stmt);
1813 oprnd0 = gimple_assign_rhs1 (last_stmt);
1814 type = TREE_TYPE (oprnd0);
1815 oprnd1 = gimple_assign_rhs2 (last_stmt);
1816 if (TREE_CODE (oprnd0) != SSA_NAME
1817 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1818 || !INTEGRAL_TYPE_P (type)
1819 || !TYPE_UNSIGNED (type))
1820 return NULL;
1821
81c40241 1822 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
7e9a3abb
JJ
1823 return NULL;
1824
1825 if (dt != vect_internal_def
1826 && dt != vect_constant_def
1827 && dt != vect_external_def)
1828 return NULL;
1829
1830 vectype = get_vectype_for_scalar_type (type);
1831 if (vectype == NULL_TREE)
1832 return NULL;
1833
1834 /* If vector/vector or vector/scalar rotate is supported by the target,
1835 don't do anything here. */
1836 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1837 if (optab1
1838 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1839 return NULL;
1840
310213d4 1841 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
7e9a3abb
JJ
1842 {
1843 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1844 if (optab2
1845 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1846 return NULL;
1847 }
1848
1849 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1850 don't do anything here either. */
1851 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1852 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1853 if (!optab1
1854 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1855 || !optab2
1856 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1857 {
310213d4 1858 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
7e9a3abb
JJ
1859 return NULL;
1860 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1861 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1862 if (!optab1
1863 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1864 || !optab2
1865 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1866 return NULL;
1867 }
1868
1869 *type_in = vectype;
1870 *type_out = vectype;
1871 if (*type_in == NULL_TREE)
1872 return NULL;
1873
68119618
JJ
1874 if (dt == vect_external_def
1875 && TREE_CODE (oprnd1) == SSA_NAME
310213d4 1876 && is_a <loop_vec_info> (vinfo))
68119618 1877 {
310213d4 1878 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
68119618
JJ
1879 ext_def = loop_preheader_edge (loop);
1880 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1881 {
1882 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1883 if (bb == NULL
1884 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1885 ext_def = NULL;
1886 }
1887 }
1888
7e9a3abb
JJ
1889 def = NULL_TREE;
1890 if (TREE_CODE (oprnd1) == INTEGER_CST
1891 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1892 def = oprnd1;
1893 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1894 {
1895 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1896 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1897 && TYPE_PRECISION (TREE_TYPE (rhs1))
1898 == TYPE_PRECISION (type))
1899 def = rhs1;
1900 }
1901
1902 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1903 if (def == NULL_TREE)
1904 {
1905 def = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1906 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
68119618
JJ
1907 if (ext_def)
1908 {
1909 basic_block new_bb
1910 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1911 gcc_assert (!new_bb);
1912 }
1913 else
1914 append_pattern_def_seq (stmt_vinfo, def_stmt);
7e9a3abb
JJ
1915 }
1916 stype = TREE_TYPE (def);
1917
1918 if (TREE_CODE (def) == INTEGER_CST)
1919 {
cc269bb6 1920 if (!tree_fits_uhwi_p (def)
7d362f6c 1921 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
7e9a3abb
JJ
1922 || integer_zerop (def))
1923 return NULL;
1924 def2 = build_int_cst (stype,
1925 GET_MODE_PRECISION (TYPE_MODE (type))
ae7e9ddd 1926 - tree_to_uhwi (def));
7e9a3abb
JJ
1927 }
1928 else
1929 {
1930 tree vecstype = get_vectype_for_scalar_type (stype);
1931 stmt_vec_info def_stmt_vinfo;
1932
1933 if (vecstype == NULL_TREE)
1934 return NULL;
1935 def2 = vect_recog_temp_ssa_var (stype, NULL);
0d0e4a03 1936 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
68119618
JJ
1937 if (ext_def)
1938 {
1939 basic_block new_bb
1940 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1941 gcc_assert (!new_bb);
1942 }
1943 else
1944 {
310213d4 1945 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
68119618
JJ
1946 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1947 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1948 append_pattern_def_seq (stmt_vinfo, def_stmt);
1949 }
7e9a3abb
JJ
1950
1951 def2 = vect_recog_temp_ssa_var (stype, NULL);
1952 tree mask
1953 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
0d0e4a03
JJ
1954 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1955 gimple_assign_lhs (def_stmt), mask);
68119618
JJ
1956 if (ext_def)
1957 {
1958 basic_block new_bb
1959 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1960 gcc_assert (!new_bb);
1961 }
1962 else
1963 {
310213d4 1964 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
68119618
JJ
1965 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1966 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1967 append_pattern_def_seq (stmt_vinfo, def_stmt);
1968 }
7e9a3abb
JJ
1969 }
1970
1971 var1 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
1972 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1973 ? LSHIFT_EXPR : RSHIFT_EXPR,
1974 oprnd0, def);
7e9a3abb
JJ
1975 append_pattern_def_seq (stmt_vinfo, def_stmt);
1976
1977 var2 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
1978 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1979 ? RSHIFT_EXPR : LSHIFT_EXPR,
1980 oprnd0, def2);
7e9a3abb
JJ
1981 append_pattern_def_seq (stmt_vinfo, def_stmt);
1982
1983 /* Pattern detected. */
1984 if (dump_enabled_p ())
1985 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1986 "vect_recog_rotate_pattern: detected:\n");
7e9a3abb
JJ
1987
1988 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1989 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1990 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
7e9a3abb
JJ
1991
1992 if (dump_enabled_p ())
1993 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
36ba4aae 1994
9771b263 1995 stmts->safe_push (last_stmt);
36ba4aae
IR
1996 return pattern_stmt;
1997}
1107f3ae 1998
732a0ad3
JJ
1999/* Detect a vector by vector shift pattern that wouldn't be otherwise
2000 vectorized:
2001
2002 type a_t;
2003 TYPE b_T, res_T;
2004
2005 S1 a_t = ;
2006 S2 b_T = ;
2007 S3 res_T = b_T op a_t;
2008
2009 where type 'TYPE' is a type with different size than 'type',
2010 and op is <<, >> or rotate.
2011
2012 Also detect cases:
2013
2014 type a_t;
2015 TYPE b_T, c_T, res_T;
2016
2017 S0 c_T = ;
2018 S1 a_t = (type) c_T;
2019 S2 b_T = ;
2020 S3 res_T = b_T op a_t;
2021
2022 Input/Output:
2023
2024 * STMTS: Contains a stmt from which the pattern search begins,
2025 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2026 with a shift/rotate which has same type on both operands, in the
2027 second case just b_T op c_T, in the first case with added cast
363477c0 2028 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
732a0ad3
JJ
2029
2030 Output:
2031
2032 * TYPE_IN: The type of the input arguments to the pattern.
2033
2034 * TYPE_OUT: The type of the output of this pattern.
2035
2036 * Return value: A new stmt that will be used to replace the shift/rotate
2037 S3 stmt. */
2038
355fe088
TS
2039static gimple *
2040vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
732a0ad3
JJ
2041 tree *type_in, tree *type_out)
2042{
355fe088 2043 gimple *last_stmt = stmts->pop ();
732a0ad3 2044 tree oprnd0, oprnd1, lhs, var;
355fe088 2045 gimple *pattern_stmt, *def_stmt;
732a0ad3
JJ
2046 enum tree_code rhs_code;
2047 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
310213d4 2048 vec_info *vinfo = stmt_vinfo->vinfo;
732a0ad3 2049 enum vect_def_type dt;
732a0ad3
JJ
2050
2051 if (!is_gimple_assign (last_stmt))
2052 return NULL;
2053
2054 rhs_code = gimple_assign_rhs_code (last_stmt);
2055 switch (rhs_code)
2056 {
2057 case LSHIFT_EXPR:
2058 case RSHIFT_EXPR:
2059 case LROTATE_EXPR:
2060 case RROTATE_EXPR:
2061 break;
2062 default:
2063 return NULL;
2064 }
2065
2066 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2067 return NULL;
2068
2069 lhs = gimple_assign_lhs (last_stmt);
2070 oprnd0 = gimple_assign_rhs1 (last_stmt);
2071 oprnd1 = gimple_assign_rhs2 (last_stmt);
2072 if (TREE_CODE (oprnd0) != SSA_NAME
2073 || TREE_CODE (oprnd1) != SSA_NAME
2074 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2075 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2076 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2077 || TYPE_PRECISION (TREE_TYPE (lhs))
2078 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2079 return NULL;
2080
81c40241 2081 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
732a0ad3
JJ
2082 return NULL;
2083
2084 if (dt != vect_internal_def)
2085 return NULL;
2086
2087 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2088 *type_out = *type_in;
2089 if (*type_in == NULL_TREE)
2090 return NULL;
2091
81c40241 2092 tree def = NULL_TREE;
97ecdb46
JJ
2093 stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2094 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
732a0ad3
JJ
2095 {
2096 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2097 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2098 && TYPE_PRECISION (TREE_TYPE (rhs1))
2099 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
0179520a
JJ
2100 {
2101 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2102 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2103 def = rhs1;
2104 else
2105 {
2106 tree mask
2107 = build_low_bits_mask (TREE_TYPE (rhs1),
2108 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2109 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2110 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2111 new_pattern_def_seq (stmt_vinfo, def_stmt);
2112 }
2113 }
732a0ad3
JJ
2114 }
2115
2116 if (def == NULL_TREE)
2117 {
2118 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2119 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
083481d8 2120 new_pattern_def_seq (stmt_vinfo, def_stmt);
732a0ad3
JJ
2121 }
2122
2123 /* Pattern detected. */
73fbfcad 2124 if (dump_enabled_p ())
ccb3ad87 2125 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2126 "vect_recog_vector_vector_shift_pattern: detected:\n");
732a0ad3
JJ
2127
2128 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2129 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2130 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
732a0ad3 2131
73fbfcad 2132 if (dump_enabled_p ())
78c60e3d 2133 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
732a0ad3 2134
9771b263 2135 stmts->safe_push (last_stmt);
732a0ad3
JJ
2136 return pattern_stmt;
2137}
2138
47486460
VK
2139/* Detect multiplication by constant which are postive or negatives of power 2,
2140 and convert them to shift patterns.
2141
2142 Mult with constants that are postive power of two.
2143 type a_t;
2144 type b_t
2145 S1: b_t = a_t * n
2146
2147 or
2148
2149 Mult with constants that are negative power of two.
2150 S2: b_t = a_t * -n
2151
2152 Input/Output:
2153
2154 STMTS: Contains a stmt from which the pattern search begins,
2155 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2156 constant operand is a power of 2.
2157 type a_t, b_t
2158 S1': b_t = a_t << log2 (n)
2159
2160 Convert the mult operation to LSHIFT and followed by a NEGATE
2161 if constant operand is a negative power of 2.
2162 type a_t, b_t, res_T;
2163 S2': b_t = a_t << log2 (n)
2164 S3': res_T = - (b_t)
2165
2166 Output:
2167
2168 * TYPE_IN: The type of the input arguments to the pattern.
2169
2170 * TYPE_OUT: The type of the output of this pattern.
2171
2172 * Return value: A new stmt that will be used to replace the multiplication
2173 S1 or S2 stmt. */
2174
355fe088
TS
2175static gimple *
2176vect_recog_mult_pattern (vec<gimple *> *stmts,
47486460
VK
2177 tree *type_in, tree *type_out)
2178{
355fe088 2179 gimple *last_stmt = stmts->pop ();
47486460 2180 tree oprnd0, oprnd1, vectype, itype;
355fe088 2181 gimple *pattern_stmt, *def_stmt;
47486460
VK
2182 optab optab;
2183 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2184 int power2_val, power2_neg_val;
2185 tree shift;
2186
2187 if (!is_gimple_assign (last_stmt))
2188 return NULL;
2189
2190 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2191 return NULL;
2192
2193 oprnd0 = gimple_assign_rhs1 (last_stmt);
2194 oprnd1 = gimple_assign_rhs2 (last_stmt);
2195 itype = TREE_TYPE (oprnd0);
2196
2197 if (TREE_CODE (oprnd0) != SSA_NAME
2198 || TREE_CODE (oprnd1) != INTEGER_CST
2199 || !INTEGRAL_TYPE_P (itype)
2200 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2201 return NULL;
2202
2203 vectype = get_vectype_for_scalar_type (itype);
2204 if (vectype == NULL_TREE)
2205 return NULL;
2206
2207 /* If the target can handle vectorized multiplication natively,
2208 don't attempt to optimize this. */
2209 optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2210 if (optab != unknown_optab)
2211 {
2212 machine_mode vec_mode = TYPE_MODE (vectype);
2213 int icode = (int) optab_handler (optab, vec_mode);
2214 if (icode != CODE_FOR_nothing)
2215 return NULL;
2216 }
2217
2218 /* If target cannot handle vector left shift then we cannot
2219 optimize and bail out. */
2220 optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2221 if (!optab
2222 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2223 return NULL;
2224
2225 power2_val = wi::exact_log2 (oprnd1);
2226 power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
2227
2228 /* Handle constant operands that are postive or negative powers of 2. */
2229 if (power2_val != -1)
2230 {
2231 shift = build_int_cst (itype, power2_val);
2232 pattern_stmt
2233 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2234 LSHIFT_EXPR, oprnd0, shift);
2235 }
2236 else if (power2_neg_val != -1)
2237 {
2238 /* If the target cannot handle vector NEGATE then we cannot
2239 do the optimization. */
2240 optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
2241 if (!optab
2242 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2243 return NULL;
2244
2245 shift = build_int_cst (itype, power2_neg_val);
2246 def_stmt
2247 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2248 LSHIFT_EXPR, oprnd0, shift);
2249 new_pattern_def_seq (stmt_vinfo, def_stmt);
2250 pattern_stmt
2251 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2252 NEGATE_EXPR, gimple_assign_lhs (def_stmt));
2253 }
2254 else
2255 return NULL;
2256
2257 /* Pattern detected. */
2258 if (dump_enabled_p ())
2259 dump_printf_loc (MSG_NOTE, vect_location,
2260 "vect_recog_mult_pattern: detected:\n");
2261
2262 if (dump_enabled_p ())
2263 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2264 pattern_stmt,0);
2265
2266 stmts->safe_push (last_stmt);
2267 *type_in = vectype;
2268 *type_out = vectype;
2269
2270 return pattern_stmt;
2271}
2272
079c527f 2273/* Detect a signed division by a constant that wouldn't be
363477c0
JJ
2274 otherwise vectorized:
2275
2276 type a_t, b_t;
2277
2278 S1 a_t = b_t / N;
2279
079c527f 2280 where type 'type' is an integral type and N is a constant.
363477c0 2281
079c527f 2282 Similarly handle modulo by a constant:
363477c0
JJ
2283
2284 S4 a_t = b_t % N;
2285
2286 Input/Output:
2287
2288 * STMTS: Contains a stmt from which the pattern search begins,
079c527f
JJ
2289 i.e. the division stmt. S1 is replaced by if N is a power
2290 of two constant and type is signed:
363477c0
JJ
2291 S3 y_t = b_t < 0 ? N - 1 : 0;
2292 S2 x_t = b_t + y_t;
2293 S1' a_t = x_t >> log2 (N);
2294
079c527f
JJ
2295 S4 is replaced if N is a power of two constant and
2296 type is signed by (where *_T temporaries have unsigned type):
363477c0
JJ
2297 S9 y_T = b_t < 0 ? -1U : 0U;
2298 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2299 S7 z_t = (type) z_T;
2300 S6 w_t = b_t + z_t;
2301 S5 x_t = w_t & (N - 1);
2302 S4' a_t = x_t - z_t;
2303
2304 Output:
2305
2306 * TYPE_IN: The type of the input arguments to the pattern.
2307
2308 * TYPE_OUT: The type of the output of this pattern.
2309
2310 * Return value: A new stmt that will be used to replace the division
2311 S1 or modulo S4 stmt. */
2312
355fe088
TS
2313static gimple *
2314vect_recog_divmod_pattern (vec<gimple *> *stmts,
079c527f 2315 tree *type_in, tree *type_out)
363477c0 2316{
355fe088 2317 gimple *last_stmt = stmts->pop ();
5deb57cb 2318 tree oprnd0, oprnd1, vectype, itype, cond;
355fe088 2319 gimple *pattern_stmt, *def_stmt;
363477c0
JJ
2320 enum tree_code rhs_code;
2321 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
310213d4 2322 vec_info *vinfo = stmt_vinfo->vinfo;
363477c0 2323 optab optab;
00f07b86 2324 tree q;
079c527f 2325 int dummy_int, prec;
079c527f 2326 stmt_vec_info def_stmt_vinfo;
363477c0
JJ
2327
2328 if (!is_gimple_assign (last_stmt))
2329 return NULL;
2330
2331 rhs_code = gimple_assign_rhs_code (last_stmt);
2332 switch (rhs_code)
2333 {
2334 case TRUNC_DIV_EXPR:
2335 case TRUNC_MOD_EXPR:
2336 break;
2337 default:
2338 return NULL;
2339 }
2340
2341 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2342 return NULL;
2343
2344 oprnd0 = gimple_assign_rhs1 (last_stmt);
2345 oprnd1 = gimple_assign_rhs2 (last_stmt);
2346 itype = TREE_TYPE (oprnd0);
2347 if (TREE_CODE (oprnd0) != SSA_NAME
2348 || TREE_CODE (oprnd1) != INTEGER_CST
2349 || TREE_CODE (itype) != INTEGER_TYPE
079c527f 2350 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
363477c0
JJ
2351 return NULL;
2352
2353 vectype = get_vectype_for_scalar_type (itype);
2354 if (vectype == NULL_TREE)
2355 return NULL;
2356
2357 /* If the target can handle vectorized division or modulo natively,
2358 don't attempt to optimize this. */
2359 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2225b9f2 2360 if (optab != unknown_optab)
363477c0 2361 {
ef4bddc2 2362 machine_mode vec_mode = TYPE_MODE (vectype);
363477c0 2363 int icode = (int) optab_handler (optab, vec_mode);
e6d4f8f5 2364 if (icode != CODE_FOR_nothing)
363477c0
JJ
2365 return NULL;
2366 }
2367
079c527f
JJ
2368 prec = TYPE_PRECISION (itype);
2369 if (integer_pow2p (oprnd1))
363477c0 2370 {
079c527f
JJ
2371 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2372 return NULL;
363477c0 2373
079c527f 2374 /* Pattern detected. */
73fbfcad 2375 if (dump_enabled_p ())
ccb3ad87 2376 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2377 "vect_recog_divmod_pattern: detected:\n");
079c527f
JJ
2378
2379 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2380 build_int_cst (itype, 0));
2381 if (rhs_code == TRUNC_DIV_EXPR)
2382 {
2383 tree var = vect_recog_temp_ssa_var (itype, NULL);
2384 tree shift;
2385 def_stmt
0d0e4a03
JJ
2386 = gimple_build_assign (var, COND_EXPR, cond,
2387 fold_build2 (MINUS_EXPR, itype, oprnd1,
2388 build_int_cst (itype, 1)),
2389 build_int_cst (itype, 0));
079c527f
JJ
2390 new_pattern_def_seq (stmt_vinfo, def_stmt);
2391 var = vect_recog_temp_ssa_var (itype, NULL);
2392 def_stmt
0d0e4a03
JJ
2393 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2394 gimple_assign_lhs (def_stmt));
079c527f
JJ
2395 append_pattern_def_seq (stmt_vinfo, def_stmt);
2396
2397 shift = build_int_cst (itype, tree_log2 (oprnd1));
2398 pattern_stmt
0d0e4a03
JJ
2399 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2400 RSHIFT_EXPR, var, shift);
079c527f
JJ
2401 }
2402 else
2403 {
2404 tree signmask;
2405 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2406 if (compare_tree_int (oprnd1, 2) == 0)
2407 {
2408 signmask = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2409 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2410 build_int_cst (itype, 1),
2411 build_int_cst (itype, 0));
079c527f
JJ
2412 append_pattern_def_seq (stmt_vinfo, def_stmt);
2413 }
2414 else
2415 {
2416 tree utype
2417 = build_nonstandard_integer_type (prec, 1);
2418 tree vecutype = get_vectype_for_scalar_type (utype);
2419 tree shift
2420 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2421 - tree_log2 (oprnd1));
2422 tree var = vect_recog_temp_ssa_var (utype, NULL);
2423
0d0e4a03
JJ
2424 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2425 build_int_cst (utype, -1),
2426 build_int_cst (utype, 0));
310213d4 2427 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
079c527f
JJ
2428 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2429 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2430 append_pattern_def_seq (stmt_vinfo, def_stmt);
2431 var = vect_recog_temp_ssa_var (utype, NULL);
0d0e4a03
JJ
2432 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2433 gimple_assign_lhs (def_stmt),
2434 shift);
310213d4 2435 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
079c527f
JJ
2436 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2437 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2438 append_pattern_def_seq (stmt_vinfo, def_stmt);
2439 signmask = vect_recog_temp_ssa_var (itype, NULL);
2440 def_stmt
0d0e4a03 2441 = gimple_build_assign (signmask, NOP_EXPR, var);
079c527f
JJ
2442 append_pattern_def_seq (stmt_vinfo, def_stmt);
2443 }
2444 def_stmt
0d0e4a03
JJ
2445 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2446 PLUS_EXPR, oprnd0, signmask);
079c527f
JJ
2447 append_pattern_def_seq (stmt_vinfo, def_stmt);
2448 def_stmt
0d0e4a03
JJ
2449 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2450 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2451 fold_build2 (MINUS_EXPR, itype, oprnd1,
2452 build_int_cst (itype, 1)));
079c527f
JJ
2453 append_pattern_def_seq (stmt_vinfo, def_stmt);
2454
2455 pattern_stmt
0d0e4a03
JJ
2456 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2457 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2458 signmask);
079c527f
JJ
2459 }
2460
73fbfcad 2461 if (dump_enabled_p ())
78c60e3d
SS
2462 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2463 0);
079c527f 2464
9771b263 2465 stmts->safe_push (last_stmt);
079c527f
JJ
2466
2467 *type_in = vectype;
2468 *type_out = vectype;
2469 return pattern_stmt;
363477c0 2470 }
079c527f 2471
6b58915b
RS
2472 if (prec > HOST_BITS_PER_WIDE_INT
2473 || integer_zerop (oprnd1))
079c527f
JJ
2474 return NULL;
2475
00f07b86
RH
2476 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2477 return NULL;
079c527f
JJ
2478
2479 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2480
2481 if (TYPE_UNSIGNED (itype))
363477c0 2482 {
079c527f
JJ
2483 unsigned HOST_WIDE_INT mh, ml;
2484 int pre_shift, post_shift;
6b58915b
RS
2485 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2486 & GET_MODE_MASK (TYPE_MODE (itype)));
5deb57cb 2487 tree t1, t2, t3, t4;
079c527f
JJ
2488
2489 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2490 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2491 return NULL;
2492
2493 /* Find a suitable multiplier and right shift count
2494 instead of multiplying with D. */
2495 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2496
2497 /* If the suggested multiplier is more than SIZE bits, we can do better
2498 for even divisors, using an initial right shift. */
2499 if (mh != 0 && (d & 1) == 0)
363477c0 2500 {
079c527f
JJ
2501 pre_shift = floor_log2 (d & -d);
2502 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2503 &ml, &post_shift, &dummy_int);
2504 gcc_assert (!mh);
2505 }
2506 else
2507 pre_shift = 0;
2508
2509 if (mh != 0)
2510 {
2511 if (post_shift - 1 >= prec)
2512 return NULL;
2513
5deb57cb
JJ
2514 /* t1 = oprnd0 h* ml;
2515 t2 = oprnd0 - t1;
2516 t3 = t2 >> 1;
2517 t4 = t1 + t3;
2518 q = t4 >> (post_shift - 1); */
2519 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2520 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2521 build_int_cst (itype, ml));
079c527f 2522 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2523
5deb57cb 2524 t2 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2525 def_stmt
0d0e4a03 2526 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
083481d8 2527 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
2528
2529 t3 = vect_recog_temp_ssa_var (itype, NULL);
2530 def_stmt
0d0e4a03 2531 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
079c527f
JJ
2532 append_pattern_def_seq (stmt_vinfo, def_stmt);
2533
5deb57cb 2534 t4 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2535 def_stmt
0d0e4a03 2536 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
079c527f
JJ
2537
2538 if (post_shift != 1)
2539 {
2540 append_pattern_def_seq (stmt_vinfo, def_stmt);
2541
5deb57cb 2542 q = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2543 pattern_stmt
0d0e4a03
JJ
2544 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2545 build_int_cst (itype, post_shift - 1));
079c527f
JJ
2546 }
2547 else
2548 {
5deb57cb 2549 q = t4;
079c527f
JJ
2550 pattern_stmt = def_stmt;
2551 }
363477c0
JJ
2552 }
2553 else
2554 {
079c527f
JJ
2555 if (pre_shift >= prec || post_shift >= prec)
2556 return NULL;
2557
2558 /* t1 = oprnd0 >> pre_shift;
5deb57cb
JJ
2559 t2 = t1 h* ml;
2560 q = t2 >> post_shift; */
079c527f
JJ
2561 if (pre_shift)
2562 {
2563 t1 = vect_recog_temp_ssa_var (itype, NULL);
2564 def_stmt
0d0e4a03
JJ
2565 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2566 build_int_cst (NULL, pre_shift));
079c527f
JJ
2567 append_pattern_def_seq (stmt_vinfo, def_stmt);
2568 }
2569 else
2570 t1 = oprnd0;
363477c0 2571
5deb57cb 2572 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2573 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2574 build_int_cst (itype, ml));
079c527f 2575
5deb57cb
JJ
2576 if (post_shift)
2577 {
2578 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2579
5deb57cb
JJ
2580 q = vect_recog_temp_ssa_var (itype, NULL);
2581 def_stmt
0d0e4a03
JJ
2582 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2583 build_int_cst (itype, post_shift));
5deb57cb
JJ
2584 }
2585 else
2586 q = t2;
2587
2588 pattern_stmt = def_stmt;
079c527f
JJ
2589 }
2590 }
2591 else
2592 {
2593 unsigned HOST_WIDE_INT ml;
4ee4c52c 2594 int post_shift;
6b58915b 2595 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
079c527f
JJ
2596 unsigned HOST_WIDE_INT abs_d;
2597 bool add = false;
5deb57cb 2598 tree t1, t2, t3, t4;
079c527f
JJ
2599
2600 /* Give up for -1. */
2601 if (d == -1)
2602 return NULL;
2603
079c527f
JJ
2604 /* Since d might be INT_MIN, we have to cast to
2605 unsigned HOST_WIDE_INT before negating to avoid
2606 undefined signed overflow. */
2607 abs_d = (d >= 0
2608 ? (unsigned HOST_WIDE_INT) d
2609 : - (unsigned HOST_WIDE_INT) d);
2610
2611 /* n rem d = n rem -d */
2612 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2613 {
2614 d = abs_d;
2615 oprnd1 = build_int_cst (itype, abs_d);
2616 }
2617 else if (HOST_BITS_PER_WIDE_INT >= prec
2618 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2619 /* This case is not handled correctly below. */
2620 return NULL;
2621
4ee4c52c 2622 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
079c527f
JJ
2623 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2624 {
2625 add = true;
2626 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2627 }
2628 if (post_shift >= prec)
2629 return NULL;
2630
7abed779 2631 /* t1 = oprnd0 h* ml; */
5deb57cb 2632 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2633 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2634 build_int_cst (itype, ml));
079c527f
JJ
2635
2636 if (add)
2637 {
5deb57cb 2638 /* t2 = t1 + oprnd0; */
7abed779 2639 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2640 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2641 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
079c527f
JJ
2642 }
2643 else
5deb57cb 2644 t2 = t1;
079c527f 2645
5deb57cb 2646 if (post_shift)
079c527f 2647 {
5deb57cb 2648 /* t3 = t2 >> post_shift; */
7abed779 2649 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2650 t3 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2651 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2652 build_int_cst (itype, post_shift));
363477c0 2653 }
079c527f 2654 else
5deb57cb 2655 t3 = t2;
079c527f 2656
807e902e 2657 wide_int oprnd0_min, oprnd0_max;
7abed779
JJ
2658 int msb = 1;
2659 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2660 {
807e902e 2661 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
7abed779 2662 msb = 0;
807e902e 2663 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
7abed779
JJ
2664 msb = -1;
2665 }
079c527f 2666
7abed779
JJ
2667 if (msb == 0 && d >= 0)
2668 {
2669 /* q = t3; */
2670 q = t3;
2671 pattern_stmt = def_stmt;
2672 }
2673 else
2674 {
2675 /* t4 = oprnd0 >> (prec - 1);
2676 or if we know from VRP that oprnd0 >= 0
2677 t4 = 0;
2678 or if we know from VRP that oprnd0 < 0
2679 t4 = -1; */
2680 append_pattern_def_seq (stmt_vinfo, def_stmt);
2681 t4 = vect_recog_temp_ssa_var (itype, NULL);
2682 if (msb != 1)
0d0e4a03
JJ
2683 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2684 build_int_cst (itype, msb));
7abed779 2685 else
0d0e4a03
JJ
2686 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2687 build_int_cst (itype, prec - 1));
7abed779
JJ
2688 append_pattern_def_seq (stmt_vinfo, def_stmt);
2689
2690 /* q = t3 - t4; or q = t4 - t3; */
2691 q = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2692 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2693 d < 0 ? t3 : t4);
7abed779 2694 }
079c527f
JJ
2695 }
2696
2697 if (rhs_code == TRUNC_MOD_EXPR)
2698 {
2699 tree r, t1;
2700
2701 /* We divided. Now finish by:
2702 t1 = q * oprnd1;
2703 r = oprnd0 - t1; */
2704 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2705
2706 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2707 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
083481d8 2708 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0 2709
079c527f 2710 r = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2711 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
363477c0
JJ
2712 }
2713
079c527f 2714 /* Pattern detected. */
73fbfcad 2715 if (dump_enabled_p ())
78c60e3d 2716 {
ccb3ad87 2717 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 2718 "vect_recog_divmod_pattern: detected: ");
ccb3ad87 2719 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 2720 dump_printf (MSG_NOTE, "\n");
78c60e3d 2721 }
363477c0 2722
9771b263 2723 stmts->safe_push (last_stmt);
363477c0
JJ
2724
2725 *type_in = vectype;
2726 *type_out = vectype;
2727 return pattern_stmt;
2728}
2729
69d2aade
JJ
2730/* Function vect_recog_mixed_size_cond_pattern
2731
2732 Try to find the following pattern:
2733
2734 type x_t, y_t;
2735 TYPE a_T, b_T, c_T;
2736 loop:
2737 S1 a_T = x_t CMP y_t ? b_T : c_T;
2738
2739 where type 'TYPE' is an integral type which has different size
bc4fb355 2740 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
69d2aade 2741 than 'type', the constants need to fit into an integer type
bc4fb355 2742 with the same width as 'type') or results of conversion from 'type'.
69d2aade
JJ
2743
2744 Input:
2745
2746 * LAST_STMT: A stmt from which the pattern search begins.
2747
2748 Output:
2749
2750 * TYPE_IN: The type of the input arguments to the pattern.
2751
2752 * TYPE_OUT: The type of the output of this pattern.
2753
2754 * Return value: A new stmt that will be used to replace the pattern.
2755 Additionally a def_stmt is added.
2756
2757 a_it = x_t CMP y_t ? b_it : c_it;
2758 a_T = (TYPE) a_it; */
2759
355fe088
TS
2760static gimple *
2761vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
69d2aade
JJ
2762 tree *type_out)
2763{
355fe088 2764 gimple *last_stmt = (*stmts)[0];
69d2aade
JJ
2765 tree cond_expr, then_clause, else_clause;
2766 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
bc4fb355 2767 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
355fe088 2768 gimple *pattern_stmt, *def_stmt;
310213d4 2769 vec_info *vinfo = stmt_vinfo->vinfo;
bc4fb355 2770 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
355fe088 2771 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
bc4fb355
IR
2772 bool promotion;
2773 tree comp_scalar_type;
69d2aade
JJ
2774
2775 if (!is_gimple_assign (last_stmt)
2776 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2777 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2778 return NULL;
2779
2780 cond_expr = gimple_assign_rhs1 (last_stmt);
2781 then_clause = gimple_assign_rhs2 (last_stmt);
2782 else_clause = gimple_assign_rhs3 (last_stmt);
2783
87aab9b2
JJ
2784 if (!COMPARISON_CLASS_P (cond_expr))
2785 return NULL;
2786
bc4fb355
IR
2787 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2788 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
87aab9b2 2789 if (comp_vectype == NULL_TREE)
69d2aade
JJ
2790 return NULL;
2791
2792 type = gimple_expr_type (last_stmt);
bc4fb355
IR
2793 if (types_compatible_p (type, comp_scalar_type)
2794 || ((TREE_CODE (then_clause) != INTEGER_CST
2795 || TREE_CODE (else_clause) != INTEGER_CST)
2796 && !INTEGRAL_TYPE_P (comp_scalar_type))
2797 || !INTEGRAL_TYPE_P (type))
2798 return NULL;
2799
2800 if ((TREE_CODE (then_clause) != INTEGER_CST
2801 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2802 &def_stmt0, &promotion))
2803 || (TREE_CODE (else_clause) != INTEGER_CST
2804 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2805 &def_stmt1, &promotion)))
2806 return NULL;
2807
2808 if (orig_type0 && orig_type1
2809 && !types_compatible_p (orig_type0, orig_type1))
2810 return NULL;
2811
2812 if (orig_type0)
2813 {
2814 if (!types_compatible_p (orig_type0, comp_scalar_type))
2815 return NULL;
2816 then_clause = gimple_assign_rhs1 (def_stmt0);
2817 itype = orig_type0;
2818 }
2819
2820 if (orig_type1)
2821 {
2822 if (!types_compatible_p (orig_type1, comp_scalar_type))
2823 return NULL;
2824 else_clause = gimple_assign_rhs1 (def_stmt1);
2825 itype = orig_type1;
2826 }
2827
69d2aade 2828
6c825cd4
DS
2829 HOST_WIDE_INT cmp_mode_size
2830 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
2831
2832 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
69d2aade
JJ
2833 return NULL;
2834
2835 vectype = get_vectype_for_scalar_type (type);
2836 if (vectype == NULL_TREE)
2837 return NULL;
2838
2839 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2840 return NULL;
2841
bc4fb355 2842 if (itype == NULL_TREE)
6c825cd4 2843 itype = build_nonstandard_integer_type (cmp_mode_size,
bc4fb355
IR
2844 TYPE_UNSIGNED (type));
2845
69d2aade 2846 if (itype == NULL_TREE
6c825cd4 2847 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
69d2aade
JJ
2848 return NULL;
2849
2850 vecitype = get_vectype_for_scalar_type (itype);
2851 if (vecitype == NULL_TREE)
2852 return NULL;
2853
2854 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2855 return NULL;
2856
6c825cd4 2857 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
69d2aade 2858 {
bc4fb355
IR
2859 if ((TREE_CODE (then_clause) == INTEGER_CST
2860 && !int_fits_type_p (then_clause, itype))
2861 || (TREE_CODE (else_clause) == INTEGER_CST
2862 && !int_fits_type_p (else_clause, itype)))
69d2aade
JJ
2863 return NULL;
2864 }
2865
0d0e4a03
JJ
2866 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2867 COND_EXPR, unshare_expr (cond_expr),
2868 fold_convert (itype, then_clause),
2869 fold_convert (itype, else_clause));
2870 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2871 NOP_EXPR, gimple_assign_lhs (def_stmt));
69d2aade 2872
083481d8 2873 new_pattern_def_seq (stmt_vinfo, def_stmt);
310213d4 2874 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
69d2aade
JJ
2875 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2876 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2877 *type_in = vecitype;
2878 *type_out = vectype;
2879
73fbfcad 2880 if (dump_enabled_p ())
ccb3ad87 2881 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2882 "vect_recog_mixed_size_cond_pattern: detected:\n");
f5709183 2883
69d2aade
JJ
2884 return pattern_stmt;
2885}
2886
2887
71c92d17 2888/* Helper function of vect_recog_bool_pattern. Called recursively, return
42fd8198
IE
2889 true if bool VAR can and should be optimized that way. Assume it shouldn't
2890 in case it's a result of a comparison which can be directly vectorized into
2891 a vector comparison. */
71c92d17
JJ
2892
2893static bool
310213d4 2894check_bool_pattern (tree var, vec_info *vinfo)
71c92d17 2895{
355fe088 2896 gimple *def_stmt;
71c92d17 2897 enum vect_def_type dt;
81c40241 2898 tree rhs1;
71c92d17
JJ
2899 enum tree_code rhs_code;
2900
81c40241 2901 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
71c92d17
JJ
2902 return false;
2903
2904 if (dt != vect_internal_def)
2905 return false;
2906
2907 if (!is_gimple_assign (def_stmt))
2908 return false;
2909
81c40241 2910 if (!has_single_use (var))
71c92d17
JJ
2911 return false;
2912
2913 rhs1 = gimple_assign_rhs1 (def_stmt);
2914 rhs_code = gimple_assign_rhs_code (def_stmt);
2915 switch (rhs_code)
2916 {
2917 case SSA_NAME:
310213d4 2918 return check_bool_pattern (rhs1, vinfo);
71c92d17
JJ
2919
2920 CASE_CONVERT:
2921 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2922 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2923 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2924 return false;
310213d4 2925 return check_bool_pattern (rhs1, vinfo);
71c92d17
JJ
2926
2927 case BIT_NOT_EXPR:
310213d4 2928 return check_bool_pattern (rhs1, vinfo);
71c92d17
JJ
2929
2930 case BIT_AND_EXPR:
2931 case BIT_IOR_EXPR:
2932 case BIT_XOR_EXPR:
310213d4 2933 if (!check_bool_pattern (rhs1, vinfo))
71c92d17 2934 return false;
310213d4 2935 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo);
71c92d17
JJ
2936
2937 default:
2938 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2939 {
42fd8198 2940 tree vecitype, comp_vectype, mask_type;
71c92d17 2941
2f326699
JJ
2942 /* If the comparison can throw, then is_gimple_condexpr will be
2943 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2944 if (stmt_could_throw_p (def_stmt))
2945 return false;
2946
71c92d17
JJ
2947 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2948 if (comp_vectype == NULL_TREE)
2949 return false;
2950
42fd8198
IE
2951 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
2952 if (mask_type
2953 && expand_vec_cmp_expr_p (comp_vectype, mask_type))
2954 return false;
2955
71c92d17
JJ
2956 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2957 {
ef4bddc2 2958 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 2959 tree itype
ab0ef706 2960 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
2961 vecitype = get_vectype_for_scalar_type (itype);
2962 if (vecitype == NULL_TREE)
2963 return false;
2964 }
2965 else
2966 vecitype = comp_vectype;
2967 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2968 }
2969 return false;
2970 }
2971}
2972
2973
2974/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2975 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
363477c0 2976 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
71c92d17
JJ
2977
2978static tree
2979adjust_bool_pattern_cast (tree type, tree var)
2980{
2981 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
355fe088 2982 gimple *cast_stmt, *pattern_stmt;
71c92d17 2983
363477c0 2984 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
71c92d17 2985 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
083481d8 2986 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
0d0e4a03
JJ
2987 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2988 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
71c92d17
JJ
2989 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2990 return gimple_assign_lhs (cast_stmt);
2991}
2992
2993
2994/* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2995 recursively. VAR is an SSA_NAME that should be transformed from bool
2996 to a wider integer type, OUT_TYPE is the desired final integer type of
2997 the whole pattern, TRUEVAL should be NULL unless optimizing
2998 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2999 in the then_clause, STMTS is where statements with added pattern stmts
3000 should be pushed to. */
3001
3002static tree
3003adjust_bool_pattern (tree var, tree out_type, tree trueval,
355fe088 3004 vec<gimple *> *stmts)
71c92d17 3005{
355fe088 3006 gimple *stmt = SSA_NAME_DEF_STMT (var);
71c92d17
JJ
3007 enum tree_code rhs_code, def_rhs_code;
3008 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3009 location_t loc;
355fe088 3010 gimple *pattern_stmt, *def_stmt;
71c92d17
JJ
3011
3012 rhs1 = gimple_assign_rhs1 (stmt);
3013 rhs2 = gimple_assign_rhs2 (stmt);
3014 rhs_code = gimple_assign_rhs_code (stmt);
3015 loc = gimple_location (stmt);
3016 switch (rhs_code)
3017 {
3018 case SSA_NAME:
3019 CASE_CONVERT:
3020 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3021 itype = TREE_TYPE (irhs1);
3022 pattern_stmt
0d0e4a03
JJ
3023 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3024 SSA_NAME, irhs1);
71c92d17
JJ
3025 break;
3026
3027 case BIT_NOT_EXPR:
3028 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3029 itype = TREE_TYPE (irhs1);
3030 pattern_stmt
0d0e4a03
JJ
3031 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3032 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
71c92d17
JJ
3033 break;
3034
3035 case BIT_AND_EXPR:
3036 /* Try to optimize x = y & (a < b ? 1 : 0); into
3037 x = (a < b ? y : 0);
3038
3039 E.g. for:
3040 bool a_b, b_b, c_b;
3041 TYPE d_T;
3042
3043 S1 a_b = x1 CMP1 y1;
3044 S2 b_b = x2 CMP2 y2;
3045 S3 c_b = a_b & b_b;
3046 S4 d_T = (TYPE) c_b;
3047
3048 we would normally emit:
3049
3050 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3051 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3052 S3' c_T = a_T & b_T;
3053 S4' d_T = c_T;
3054
3055 but we can save one stmt by using the
3056 result of one of the COND_EXPRs in the other COND_EXPR and leave
3057 BIT_AND_EXPR stmt out:
3058
3059 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3060 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3061 S4' f_T = c_T;
3062
3063 At least when VEC_COND_EXPR is implemented using masks
3064 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3065 computes the comparison masks and ands it, in one case with
3066 all ones vector, in the other case with a vector register.
3067 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3068 often more expensive. */
3069 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3070 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3071 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3072 {
3073 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3074 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3075 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3076 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3077 {
355fe088 3078 gimple *tstmt;
71c92d17
JJ
3079 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3080 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
9771b263 3081 tstmt = stmts->pop ();
71c92d17 3082 gcc_assert (tstmt == def_stmt);
9771b263 3083 stmts->quick_push (stmt);
71c92d17
JJ
3084 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3085 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 3086 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
3087 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3088 return irhs2;
3089 }
3090 else
3091 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3092 goto and_ior_xor;
3093 }
3094 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3095 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3096 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3097 {
3098 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3099 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3100 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3101 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3102 {
355fe088 3103 gimple *tstmt;
71c92d17
JJ
3104 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3105 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
9771b263 3106 tstmt = stmts->pop ();
71c92d17 3107 gcc_assert (tstmt == def_stmt);
9771b263 3108 stmts->quick_push (stmt);
71c92d17
JJ
3109 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3110 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 3111 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
3112 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3113 return irhs1;
3114 }
3115 else
3116 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3117 goto and_ior_xor;
3118 }
3119 /* FALLTHRU */
3120 case BIT_IOR_EXPR:
3121 case BIT_XOR_EXPR:
3122 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3123 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3124 and_ior_xor:
3125 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3126 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3127 {
3128 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3129 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3130 int out_prec = TYPE_PRECISION (out_type);
3131 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3132 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3133 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3134 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3135 else
3136 {
3137 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3138 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3139 }
3140 }
3141 itype = TREE_TYPE (irhs1);
3142 pattern_stmt
0d0e4a03
JJ
3143 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3144 rhs_code, irhs1, irhs2);
71c92d17
JJ
3145 break;
3146
3147 default:
3148 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3149 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
e6a21dd2
JJ
3150 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3151 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3152 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
71c92d17 3153 {
ef4bddc2 3154 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3155 itype
ab0ef706 3156 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3157 }
3158 else
3159 itype = TREE_TYPE (rhs1);
3160 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3161 if (trueval == NULL_TREE)
3162 trueval = build_int_cst (itype, 1);
3163 else
3164 gcc_checking_assert (useless_type_conversion_p (itype,
3165 TREE_TYPE (trueval)));
3166 pattern_stmt
0d0e4a03
JJ
3167 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3168 COND_EXPR, cond_expr, trueval,
3169 build_int_cst (itype, 0));
71c92d17
JJ
3170 break;
3171 }
3172
9771b263 3173 stmts->safe_push (stmt);
71c92d17
JJ
3174 gimple_set_location (pattern_stmt, loc);
3175 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3176 return gimple_assign_lhs (pattern_stmt);
3177}
3178
3179
42fd8198
IE
3180/* Return the proper type for converting bool VAR into
3181 an integer value or NULL_TREE if no such type exists.
3182 The type is chosen so that converted value has the
3183 same number of elements as VAR's vector type. */
3184
3185static tree
3186search_type_for_mask (tree var, vec_info *vinfo)
3187{
3188 gimple *def_stmt;
3189 enum vect_def_type dt;
3190 tree rhs1;
3191 enum tree_code rhs_code;
e6f5c25d 3192 tree res = NULL_TREE, res2;
42fd8198
IE
3193
3194 if (TREE_CODE (var) != SSA_NAME)
3195 return NULL_TREE;
3196
3197 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3198 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3199 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3200 return NULL_TREE;
3201
3202 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3203 return NULL_TREE;
3204
3205 if (dt != vect_internal_def)
3206 return NULL_TREE;
3207
3208 if (!is_gimple_assign (def_stmt))
3209 return NULL_TREE;
3210
3211 rhs_code = gimple_assign_rhs_code (def_stmt);
3212 rhs1 = gimple_assign_rhs1 (def_stmt);
3213
3214 switch (rhs_code)
3215 {
3216 case SSA_NAME:
3217 case BIT_NOT_EXPR:
3218 CASE_CONVERT:
3219 res = search_type_for_mask (rhs1, vinfo);
3220 break;
3221
3222 case BIT_AND_EXPR:
3223 case BIT_IOR_EXPR:
3224 case BIT_XOR_EXPR:
e6f5c25d
IE
3225 res = search_type_for_mask (rhs1, vinfo);
3226 res2 = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo);
3227 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3228 res = res2;
42fd8198
IE
3229 break;
3230
3231 default:
3232 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3233 {
e6f5c25d
IE
3234 tree comp_vectype, mask_type;
3235
af3cdd34
IE
3236 if (TREE_CODE (TREE_TYPE (rhs1)) == BOOLEAN_TYPE)
3237 {
3238 res = search_type_for_mask (rhs1, vinfo);
3239 res2 = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo);
3240 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3241 res = res2;
3242 break;
3243 }
3244
e6f5c25d
IE
3245 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3246 if (comp_vectype == NULL_TREE)
3247 return NULL_TREE;
3248
3249 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3250 if (!mask_type
3251 || !expand_vec_cmp_expr_p (comp_vectype, mask_type))
3252 return NULL_TREE;
3253
42fd8198
IE
3254 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3255 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3256 {
3257 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3258 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3259 }
3260 else
3261 res = TREE_TYPE (rhs1);
3262 }
3263 }
3264
3265 return res;
3266}
3267
3268
71c92d17
JJ
3269/* Function vect_recog_bool_pattern
3270
3271 Try to find pattern like following:
3272
3273 bool a_b, b_b, c_b, d_b, e_b;
3274 TYPE f_T;
3275 loop:
3276 S1 a_b = x1 CMP1 y1;
3277 S2 b_b = x2 CMP2 y2;
3278 S3 c_b = a_b & b_b;
3279 S4 d_b = x3 CMP3 y3;
3280 S5 e_b = c_b | d_b;
3281 S6 f_T = (TYPE) e_b;
3282
52264dbf
RB
3283 where type 'TYPE' is an integral type. Or a similar pattern
3284 ending in
3285
3286 S6 f_Y = e_b ? r_Y : s_Y;
3287
3288 as results from if-conversion of a complex condition.
71c92d17
JJ
3289
3290 Input:
3291
3292 * LAST_STMT: A stmt at the end from which the pattern
3293 search begins, i.e. cast of a bool to
3294 an integer type.
3295
3296 Output:
3297
3298 * TYPE_IN: The type of the input arguments to the pattern.
3299
3300 * TYPE_OUT: The type of the output of this pattern.
3301
3302 * Return value: A new stmt that will be used to replace the pattern.
3303
3304 Assuming size of TYPE is the same as size of all comparisons
3305 (otherwise some casts would be added where needed), the above
3306 sequence we create related pattern stmts:
3307 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3308 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3309 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3310 S5' e_T = c_T | d_T;
3311 S6' f_T = e_T;
3312
3313 Instead of the above S3' we could emit:
3314 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3315 S3' c_T = a_T | b_T;
3316 but the above is more efficient. */
3317
355fe088
TS
3318static gimple *
3319vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
71c92d17
JJ
3320 tree *type_out)
3321{
355fe088 3322 gimple *last_stmt = stmts->pop ();
71c92d17
JJ
3323 enum tree_code rhs_code;
3324 tree var, lhs, rhs, vectype;
3325 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
42fd8198 3326 stmt_vec_info new_stmt_info;
310213d4 3327 vec_info *vinfo = stmt_vinfo->vinfo;
355fe088 3328 gimple *pattern_stmt;
71c92d17
JJ
3329
3330 if (!is_gimple_assign (last_stmt))
3331 return NULL;
3332
3333 var = gimple_assign_rhs1 (last_stmt);
3334 lhs = gimple_assign_lhs (last_stmt);
3335
3336 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3337 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3338 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3339 return NULL;
3340
3341 rhs_code = gimple_assign_rhs_code (last_stmt);
3342 if (CONVERT_EXPR_CODE_P (rhs_code))
3343 {
78048b1c
JJ
3344 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3345 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
71c92d17
JJ
3346 return NULL;
3347 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3348 if (vectype == NULL_TREE)
3349 return NULL;
3350
42fd8198
IE
3351 if (check_bool_pattern (var, vinfo))
3352 {
3353 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3354 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3355 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3356 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3357 else
3358 pattern_stmt
3359 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3360 }
71c92d17 3361 else
42fd8198
IE
3362 {
3363 tree type = search_type_for_mask (var, vinfo);
a414c77f 3364 tree cst0, cst1, tmp;
42fd8198
IE
3365
3366 if (!type)
3367 return NULL;
3368
3369 /* We may directly use cond with narrowed type to avoid
3370 multiple cond exprs with following result packing and
3371 perform single cond with packed mask instead. In case
3372 of widening we better make cond first and then extract
3373 results. */
3374 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3375 type = TREE_TYPE (lhs);
3376
3377 cst0 = build_int_cst (type, 0);
3378 cst1 = build_int_cst (type, 1);
3379 tmp = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3380 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
42fd8198
IE
3381
3382 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3383 {
3384 tree new_vectype = get_vectype_for_scalar_type (type);
3385 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3386 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3387 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3388 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3389
3390 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3391 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3392 }
3393 }
3394
71c92d17
JJ
3395 *type_out = vectype;
3396 *type_in = vectype;
9771b263 3397 stmts->safe_push (last_stmt);
73fbfcad 3398 if (dump_enabled_p ())
ccb3ad87 3399 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3400 "vect_recog_bool_pattern: detected:\n");
f5709183 3401
52264dbf
RB
3402 return pattern_stmt;
3403 }
3404 else if (rhs_code == COND_EXPR
3405 && TREE_CODE (var) == SSA_NAME)
3406 {
3407 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3408 if (vectype == NULL_TREE)
3409 return NULL;
3410
3411 /* Build a scalar type for the boolean result that when
3412 vectorized matches the vector type of the result in
3413 size and number of elements. */
3414 unsigned prec
3415 = wi::udiv_trunc (TYPE_SIZE (vectype),
3416 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3417 tree type
3418 = build_nonstandard_integer_type (prec,
3419 TYPE_UNSIGNED (TREE_TYPE (var)));
3420 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3421 return NULL;
3422
a414c77f
IE
3423 if (!check_bool_pattern (var, vinfo))
3424 return NULL;
3425
3426 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
52264dbf 3427
52264dbf
RB
3428 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3429 pattern_stmt
a414c77f
IE
3430 = gimple_build_assign (lhs, COND_EXPR,
3431 build2 (NE_EXPR, boolean_type_node,
3432 rhs, build_int_cst (type, 0)),
0d0e4a03
JJ
3433 gimple_assign_rhs2 (last_stmt),
3434 gimple_assign_rhs3 (last_stmt));
52264dbf
RB
3435 *type_out = vectype;
3436 *type_in = vectype;
3437 stmts->safe_push (last_stmt);
3438 if (dump_enabled_p ())
3439 dump_printf_loc (MSG_NOTE, vect_location,
3440 "vect_recog_bool_pattern: detected:\n");
3441
71c92d17
JJ
3442 return pattern_stmt;
3443 }
ab0ef706
JJ
3444 else if (rhs_code == SSA_NAME
3445 && STMT_VINFO_DATA_REF (stmt_vinfo))
3446 {
3447 stmt_vec_info pattern_stmt_info;
3448 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3449 gcc_assert (vectype != NULL_TREE);
78336739
JJ
3450 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3451 return NULL;
ab0ef706 3452
42fd8198
IE
3453 if (check_bool_pattern (var, vinfo))
3454 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype),
3455 NULL_TREE, stmts);
3456 else
3457 {
3458 tree type = search_type_for_mask (var, vinfo);
a414c77f 3459 tree cst0, cst1, new_vectype;
42fd8198
IE
3460
3461 if (!type)
3462 return NULL;
3463
3464 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3465 type = TREE_TYPE (vectype);
3466
3467 cst0 = build_int_cst (type, 0);
3468 cst1 = build_int_cst (type, 1);
3469 new_vectype = get_vectype_for_scalar_type (type);
3470
3471 rhs = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3472 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
42fd8198
IE
3473
3474 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3475 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3476 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3477 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3478 }
3479
ab0ef706
JJ
3480 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3481 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3482 {
3483 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
355fe088 3484 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
42fd8198 3485 append_pattern_def_seq (stmt_vinfo, cast_stmt);
ab0ef706
JJ
3486 rhs = rhs2;
3487 }
0d0e4a03 3488 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
310213d4 3489 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
ab0ef706
JJ
3490 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3491 STMT_VINFO_DATA_REF (pattern_stmt_info)
3492 = STMT_VINFO_DATA_REF (stmt_vinfo);
3493 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3494 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3495 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3496 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3497 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3498 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3499 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3500 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
78048b1c 3501 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
ab0ef706
JJ
3502 *type_out = vectype;
3503 *type_in = vectype;
9771b263 3504 stmts->safe_push (last_stmt);
73fbfcad 3505 if (dump_enabled_p ())
ccb3ad87 3506 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3507 "vect_recog_bool_pattern: detected:\n");
ab0ef706
JJ
3508 return pattern_stmt;
3509 }
71c92d17
JJ
3510 else
3511 return NULL;
3512}
3513
3514
e6f5c25d
IE
3515/* A helper for vect_recog_mask_conversion_pattern. Build
3516 conversion of MASK to a type suitable for masking VECTYPE.
3517 Built statement gets required vectype and is appended to
3518 a pattern sequence of STMT_VINFO.
3519
3520 Return converted mask. */
3521
3522static tree
3523build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3524 vec_info *vinfo)
3525{
3526 gimple *stmt;
3527 tree masktype, tmp;
3528 stmt_vec_info new_stmt_info;
3529
3530 masktype = build_same_sized_truth_vector_type (vectype);
3531 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3532 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3533 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3534 set_vinfo_for_stmt (stmt, new_stmt_info);
3535 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3536 append_pattern_def_seq (stmt_vinfo, stmt);
3537
3538 return tmp;
3539}
3540
3541
3542/* Function vect_recog_mask_conversion_pattern
3543
3544 Try to find statements which require boolean type
3545 converison. Additional conversion statements are
3546 added to handle such cases. For example:
3547
3548 bool m_1, m_2, m_3;
3549 int i_4, i_5;
3550 double d_6, d_7;
3551 char c_1, c_2, c_3;
3552
3553 S1 m_1 = i_4 > i_5;
3554 S2 m_2 = d_6 < d_7;
3555 S3 m_3 = m_1 & m_2;
3556 S4 c_1 = m_3 ? c_2 : c_3;
3557
3558 Will be transformed into:
3559
3560 S1 m_1 = i_4 > i_5;
3561 S2 m_2 = d_6 < d_7;
3562 S3'' m_2' = (_Bool[bitsize=32])m_2
3563 S3' m_3' = m_1 & m_2';
3564 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3565 S4' c_1' = m_3'' ? c_2 : c_3; */
3566
3567static gimple *
3568vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3569 tree *type_out)
3570{
3571 gimple *last_stmt = stmts->pop ();
3572 enum tree_code rhs_code;
3573 tree lhs, rhs1, rhs2, tmp, rhs1_type, rhs2_type, vectype1, vectype2;
3574 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3575 stmt_vec_info pattern_stmt_info;
3576 vec_info *vinfo = stmt_vinfo->vinfo;
3577 gimple *pattern_stmt;
3578
3579 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3580 if (is_gimple_call (last_stmt)
3581 && gimple_call_internal_p (last_stmt)
3582 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3583 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3584 {
3585 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3586
3587 if (load)
3588 {
3589 lhs = gimple_call_lhs (last_stmt);
3590 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3591 }
3592 else
3593 {
3594 rhs2 = gimple_call_arg (last_stmt, 3);
3595 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3596 }
3597
3598 rhs1 = gimple_call_arg (last_stmt, 2);
3599 rhs1_type = search_type_for_mask (rhs1, vinfo);
3600 if (!rhs1_type)
3601 return NULL;
3602 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3603
3604 if (!vectype1 || !vectype2
3605 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3606 return NULL;
3607
3608 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3609
3610 if (load)
3611 {
3612 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3613 pattern_stmt
3614 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3615 gimple_call_arg (last_stmt, 0),
3616 gimple_call_arg (last_stmt, 1),
3617 tmp);
3618 gimple_call_set_lhs (pattern_stmt, lhs);
3619 }
3620 else
3621 pattern_stmt
3622 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3623 gimple_call_arg (last_stmt, 0),
3624 gimple_call_arg (last_stmt, 1),
3625 tmp,
3626 gimple_call_arg (last_stmt, 3));
3627
3628
3629 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3630 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3631 STMT_VINFO_DATA_REF (pattern_stmt_info)
3632 = STMT_VINFO_DATA_REF (stmt_vinfo);
3633 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3634 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3635 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3636 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3637 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3638 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3639 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3640 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3641 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3642
3643 *type_out = vectype1;
3644 *type_in = vectype1;
3645 stmts->safe_push (last_stmt);
3646 if (dump_enabled_p ())
3647 dump_printf_loc (MSG_NOTE, vect_location,
3648 "vect_recog_mask_conversion_pattern: detected:\n");
3649
3650 return pattern_stmt;
3651 }
3652
3653 if (!is_gimple_assign (last_stmt))
3654 return NULL;
3655
3656 lhs = gimple_assign_lhs (last_stmt);
3657 rhs1 = gimple_assign_rhs1 (last_stmt);
3658 rhs_code = gimple_assign_rhs_code (last_stmt);
3659
3660 /* Check for cond expression requiring mask conversion. */
3661 if (rhs_code == COND_EXPR)
3662 {
3663 /* vect_recog_mixed_size_cond_pattern could apply.
3664 Do nothing then. */
3665 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3666 return NULL;
3667
3668 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3669
3670 if (TREE_CODE (rhs1) == SSA_NAME)
3671 {
3672 rhs1_type = search_type_for_mask (rhs1, vinfo);
3673 if (!rhs1_type)
3674 return NULL;
3675 }
3676 else
3677 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3678
3679 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3680
3681 if (!vectype1 || !vectype2
3682 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3683 return NULL;
3684
3685 /* If rhs1 is a comparison we need to move it into a
3686 separate statement. */
3687 if (TREE_CODE (rhs1) != SSA_NAME)
3688 {
3689 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3690 pattern_stmt = gimple_build_assign (tmp, rhs1);
3691 rhs1 = tmp;
3692
3693 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3694 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3695 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
3696 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3697 }
3698
3699 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3700
3701 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3702 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
3703 gimple_assign_rhs2 (last_stmt),
3704 gimple_assign_rhs3 (last_stmt));
3705
3706 *type_out = vectype1;
3707 *type_in = vectype1;
3708 stmts->safe_push (last_stmt);
3709 if (dump_enabled_p ())
3710 dump_printf_loc (MSG_NOTE, vect_location,
3711 "vect_recog_mask_conversion_pattern: detected:\n");
3712
3713 return pattern_stmt;
3714 }
3715
3716 /* Now check for binary boolean operations requiring conversion for
3717 one of operands. */
3718 if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)
3719 return NULL;
3720
3721 if (rhs_code != BIT_IOR_EXPR
3722 && rhs_code != BIT_XOR_EXPR
3723 && rhs_code != BIT_AND_EXPR)
3724 return NULL;
3725
3726 rhs2 = gimple_assign_rhs2 (last_stmt);
3727
3728 rhs1_type = search_type_for_mask (rhs1, vinfo);
3729 rhs2_type = search_type_for_mask (rhs2, vinfo);
3730
3731 if (!rhs1_type || !rhs2_type
3732 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
3733 return NULL;
3734
3735 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
3736 {
3737 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
3738 if (!vectype1)
3739 return NULL;
3740 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
3741 }
3742 else
3743 {
3744 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
3745 if (!vectype1)
3746 return NULL;
3747 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3748 }
3749
3750 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3751 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
3752
3753 *type_out = vectype1;
3754 *type_in = vectype1;
3755 stmts->safe_push (last_stmt);
3756 if (dump_enabled_p ())
3757 dump_printf_loc (MSG_NOTE, vect_location,
3758 "vect_recog_mask_conversion_pattern: detected:\n");
3759
3760 return pattern_stmt;
3761}
3762
3763
1107f3ae
IR
3764/* Mark statements that are involved in a pattern. */
3765
3766static inline void
355fe088 3767vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
1107f3ae
IR
3768 tree pattern_vectype)
3769{
3770 stmt_vec_info pattern_stmt_info, def_stmt_info;
3771 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
310213d4 3772 vec_info *vinfo = orig_stmt_info->vinfo;
355fe088 3773 gimple *def_stmt;
1107f3ae 3774
1107f3ae 3775 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
ab0ef706
JJ
3776 if (pattern_stmt_info == NULL)
3777 {
310213d4 3778 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
ab0ef706
JJ
3779 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3780 }
3781 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1107f3ae
IR
3782
3783 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3784 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
ab0ef706 3785 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1107f3ae
IR
3786 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3787 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3788 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
363477c0
JJ
3789 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3790 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3791 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
1107f3ae 3792 {
363477c0
JJ
3793 gimple_stmt_iterator si;
3794 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3795 !gsi_end_p (si); gsi_next (&si))
69d2aade 3796 {
363477c0
JJ
3797 def_stmt = gsi_stmt (si);
3798 def_stmt_info = vinfo_for_stmt (def_stmt);
3799 if (def_stmt_info == NULL)
3800 {
310213d4 3801 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
363477c0
JJ
3802 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3803 }
3804 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3805 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
56f8faae 3806 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
363477c0
JJ
3807 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3808 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
69d2aade 3809 }
1107f3ae
IR
3810 }
3811}
3812
b8698a0f 3813/* Function vect_pattern_recog_1
20f06221
DN
3814
3815 Input:
3816 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3817 computation pattern.
3818 STMT: A stmt from which the pattern search should start.
3819
3820 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
b8698a0f
L
3821 expression that computes the same functionality and can be used to
3822 replace the sequence of stmts that are involved in the pattern.
20f06221
DN
3823
3824 Output:
b8698a0f
L
3825 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3826 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3827 relevant vector type. If 'TYPE_IN' is already a vector type, then this
20f06221
DN
3828 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3829 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3830 to the available target pattern.
3831
b8698a0f 3832 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
3833 for vect_recog_pattern. */
3834
0bee0ef9
RB
3835static bool
3836vect_pattern_recog_1 (vect_recog_func *recog_func,
92aea285 3837 gimple_stmt_iterator si,
355fe088 3838 vec<gimple *> *stmts_to_replace)
20f06221 3839{
355fe088 3840 gimple *stmt = gsi_stmt (si), *pattern_stmt;
383d9c83 3841 stmt_vec_info stmt_info;
383d9c83 3842 loop_vec_info loop_vinfo;
20f06221
DN
3843 tree pattern_vectype;
3844 tree type_in, type_out;
20f06221 3845 enum tree_code code;
b5aeb3bb 3846 int i;
355fe088 3847 gimple *next;
20f06221 3848
9771b263
DN
3849 stmts_to_replace->truncate (0);
3850 stmts_to_replace->quick_push (stmt);
0bee0ef9 3851 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
726a989a 3852 if (!pattern_stmt)
0bee0ef9 3853 return false;
b8698a0f 3854
9771b263 3855 stmt = stmts_to_replace->last ();
383d9c83
IR
3856 stmt_info = vinfo_for_stmt (stmt);
3857 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3858
e6f5c25d
IE
3859 if (VECTOR_BOOLEAN_TYPE_P (type_in)
3860 || VECTOR_MODE_P (TYPE_MODE (type_in)))
b8698a0f
L
3861 {
3862 /* No need to check target support (already checked by the pattern
3863 recognition function). */
b690cc0f 3864 pattern_vectype = type_out ? type_out : type_in;
20f06221
DN
3865 }
3866 else
3867 {
ef4bddc2 3868 machine_mode vec_mode;
20f06221
DN
3869 enum insn_code icode;
3870 optab optab;
3871
3872 /* Check target support */
b690cc0f
RG
3873 type_in = get_vectype_for_scalar_type (type_in);
3874 if (!type_in)
0bee0ef9 3875 return false;
b690cc0f
RG
3876 if (type_out)
3877 type_out = get_vectype_for_scalar_type (type_out);
3878 else
3879 type_out = type_in;
15bbc165 3880 if (!type_out)
0bee0ef9 3881 return false;
b690cc0f 3882 pattern_vectype = type_out;
03d3e953 3883
726a989a
RB
3884 if (is_gimple_assign (pattern_stmt))
3885 code = gimple_assign_rhs_code (pattern_stmt);
3886 else
3887 {
3888 gcc_assert (is_gimple_call (pattern_stmt));
3889 code = CALL_EXPR;
3890 }
3891
b690cc0f
RG
3892 optab = optab_for_tree_code (code, type_in, optab_default);
3893 vec_mode = TYPE_MODE (type_in);
20f06221 3894 if (!optab
947131ba 3895 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
b690cc0f 3896 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
0bee0ef9 3897 return false;
20f06221
DN
3898 }
3899
3900 /* Found a vectorizable pattern. */
73fbfcad 3901 if (dump_enabled_p ())
20f06221 3902 {
ccb3ad87 3903 dump_printf_loc (MSG_NOTE, vect_location,
0bee0ef9 3904 "%s pattern recognized: ", recog_func->name);
ccb3ad87 3905 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
20f06221 3906 }
b8698a0f 3907
726a989a 3908 /* Mark the stmts that are involved in the pattern. */
1107f3ae 3909 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
20f06221 3910
b5aeb3bb
IR
3911 /* Patterns cannot be vectorized using SLP, because they change the order of
3912 computation. */
f5709183 3913 if (loop_vinfo)
9771b263 3914 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
f5709183 3915 if (next == stmt)
9771b263 3916 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
51312233 3917
1107f3ae
IR
3918 /* It is possible that additional pattern stmts are created and inserted in
3919 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3920 relevant statements. */
9771b263
DN
3921 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3922 && (unsigned) i < (stmts_to_replace->length () - 1);
51312233
IR
3923 i++)
3924 {
3925 stmt_info = vinfo_for_stmt (stmt);
3926 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
73fbfcad 3927 if (dump_enabled_p ())
51312233 3928 {
ccb3ad87 3929 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 3930 "additional pattern stmt: ");
ccb3ad87 3931 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
51312233
IR
3932 }
3933
1107f3ae 3934 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
51312233 3935 }
0bee0ef9
RB
3936
3937 return true;
20f06221
DN
3938}
3939
3940
3941/* Function vect_pattern_recog
3942
3943 Input:
3944 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3945 computation idioms.
3946
9d5e7640
IR
3947 Output - for each computation idiom that is detected we create a new stmt
3948 that provides the same functionality and that can be vectorized. We
20f06221
DN
3949 also record some information in the struct_stmt_info of the relevant
3950 stmts, as explained below:
3951
3952 At the entry to this function we have the following stmts, with the
3953 following initial value in the STMT_VINFO fields:
3954
3955 stmt in_pattern_p related_stmt vec_stmt
3956 S1: a_i = .... - - -
3957 S2: a_2 = ..use(a_i).. - - -
3958 S3: a_1 = ..use(a_2).. - - -
3959 S4: a_0 = ..use(a_1).. - - -
3960 S5: ... = ..use(a_0).. - - -
3961
3962 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
3963 represented by a single stmt. We then:
3964 - create a new stmt S6 equivalent to the pattern (the stmt is not
3965 inserted into the code)
20f06221
DN
3966 - fill in the STMT_VINFO fields as follows:
3967
3968 in_pattern_p related_stmt vec_stmt
b8698a0f 3969 S1: a_i = .... - - -
20f06221
DN
3970 S2: a_2 = ..use(a_i).. - - -
3971 S3: a_1 = ..use(a_2).. - - -
20f06221 3972 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 3973 '---> S6: a_new = .... - S4 -
20f06221
DN
3974 S5: ... = ..use(a_0).. - - -
3975
3976 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 3977 to each other through the RELATED_STMT field).
20f06221
DN
3978
3979 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3980 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3981 remain irrelevant unless used by stmts other than S4.
3982
3983 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 3984 (because they are marked as irrelevant). It will vectorize S6, and record
83197f37
IR
3985 a pointer to the new vector stmt VS6 from S6 (as usual).
3986 S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
3987
3988 in_pattern_p related_stmt vec_stmt
3989 S1: a_i = .... - - -
3990 S2: a_2 = ..use(a_i).. - - -
3991 S3: a_1 = ..use(a_2).. - - -
3992 > VS6: va_new = .... - - -
20f06221 3993 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 3994 '---> S6: a_new = .... - S4 VS6
20f06221
DN
3995 > VS5: ... = ..vuse(va_new).. - - -
3996 S5: ... = ..use(a_0).. - - -
3997
9d5e7640 3998 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
3999 elsewhere), and we'll end up with:
4000
b8698a0f 4001 VS6: va_new = ....
83197f37
IR
4002 VS5: ... = ..vuse(va_new)..
4003
4004 In case of more than one pattern statements, e.g., widen-mult with
4005 intermediate type:
4006
4007 S1 a_t = ;
4008 S2 a_T = (TYPE) a_t;
4009 '--> S3: a_it = (interm_type) a_t;
4010 S4 prod_T = a_T * CONST;
4011 '--> S5: prod_T' = a_it w* CONST;
4012
4013 there may be other users of a_T outside the pattern. In that case S2 will
4014 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4015 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4016 be recorded in S3. */
20f06221
DN
4017
4018void
310213d4 4019vect_pattern_recog (vec_info *vinfo)
20f06221 4020{
f5709183 4021 struct loop *loop;
772e61e1 4022 basic_block *bbs;
f5709183 4023 unsigned int nbbs;
726a989a 4024 gimple_stmt_iterator si;
20f06221 4025 unsigned int i, j;
355fe088
TS
4026 auto_vec<gimple *, 1> stmts_to_replace;
4027 gimple *stmt;
20f06221 4028
73fbfcad 4029 if (dump_enabled_p ())
78c60e3d 4030 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4031 "=== vect_pattern_recog ===\n");
20f06221 4032
310213d4 4033 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
f5709183
IR
4034 {
4035 loop = LOOP_VINFO_LOOP (loop_vinfo);
4036 bbs = LOOP_VINFO_BBS (loop_vinfo);
4037 nbbs = loop->num_nodes;
61d371eb
RB
4038
4039 /* Scan through the loop stmts, applying the pattern recognition
4040 functions starting at each stmt visited: */
4041 for (i = 0; i < nbbs; i++)
4042 {
4043 basic_block bb = bbs[i];
4044 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4045 {
4046 /* Scan over all generic vect_recog_xxx_pattern functions. */
4047 for (j = 0; j < NUM_PATTERNS; j++)
0bee0ef9
RB
4048 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4049 &stmts_to_replace))
4050 break;
61d371eb
RB
4051 }
4052 }
f5709183
IR
4053 }
4054 else
4055 {
61d371eb
RB
4056 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4057 for (si = bb_vinfo->region_begin;
4058 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4059 {
4060 if ((stmt = gsi_stmt (si))
f5709183
IR
4061 && vinfo_for_stmt (stmt)
4062 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
61d371eb 4063 continue;
f5709183 4064
61d371eb
RB
4065 /* Scan over all generic vect_recog_xxx_pattern functions. */
4066 for (j = 0; j < NUM_PATTERNS; j++)
0bee0ef9
RB
4067 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4068 &stmts_to_replace))
4069 break;
61d371eb 4070 }
20f06221
DN
4071 }
4072}