]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
re PR tree-optimization/68639 (ICE: Floating point exception)
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
5624e564 2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
20f06221
DN
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
20f06221
DN
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20f06221
DN
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
c7131fb2 24#include "backend.h"
957060b5 25#include "rtl.h"
20f06221 26#include "tree.h"
c7131fb2 27#include "gimple.h"
c7131fb2 28#include "ssa.h"
957060b5
AM
29#include "expmed.h"
30#include "optabs-tree.h"
31#include "insn-config.h"
32#include "recog.h" /* FIXME: for insn_data */
40e23961 33#include "fold-const.h"
d8a2d370 34#include "stor-layout.h"
2fb9a547 35#include "tree-eh.h"
45b0be94 36#include "gimplify.h"
5be5c238 37#include "gimple-iterator.h"
20f06221 38#include "cfgloop.h"
20f06221 39#include "tree-vectorizer.h"
7ee2468b 40#include "dumpfile.h"
9b2b7279 41#include "builtins.h"
b4e5bc47 42#include "internal-fn.h"
7a31e5ef 43#include "case-cfn-macros.h"
20f06221 44
20f06221 45/* Pattern recognition functions */
355fe088 46static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
51312233 47 tree *);
355fe088 48static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
51312233 49 tree *);
355fe088 50static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
51312233 51 tree *);
355fe088 52static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
79d652a5 53 tree *);
355fe088
TS
54static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
55static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
1107f3ae 56 tree *);
355fe088 57static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
36ba4aae 58 tree *, tree *);
355fe088
TS
59static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
60static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
732a0ad3 61 tree *, tree *);
355fe088 62static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
079c527f 63 tree *, tree *);
47486460 64
355fe088 65static gimple *vect_recog_mult_pattern (vec<gimple *> *,
47486460
VK
66 tree *, tree *);
67
355fe088 68static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
69d2aade 69 tree *, tree *);
355fe088 70static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
e6f5c25d 71static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
20f06221
DN
72static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
73 vect_recog_widen_mult_pattern,
74 vect_recog_widen_sum_pattern,
0b2229b0 75 vect_recog_dot_prod_pattern,
79d652a5 76 vect_recog_sad_pattern,
1107f3ae 77 vect_recog_pow_pattern,
36ba4aae 78 vect_recog_widen_shift_pattern,
33018845 79 vect_recog_over_widening_pattern,
7e9a3abb 80 vect_recog_rotate_pattern,
732a0ad3 81 vect_recog_vector_vector_shift_pattern,
079c527f 82 vect_recog_divmod_pattern,
47486460 83 vect_recog_mult_pattern,
71c92d17 84 vect_recog_mixed_size_cond_pattern,
e6f5c25d
IE
85 vect_recog_bool_pattern,
86 vect_recog_mask_conversion_pattern};
20f06221 87
083481d8 88static inline void
355fe088 89append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
083481d8 90{
a1a6c5b2
JJ
91 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
92 stmt);
083481d8
JJ
93}
94
95static inline void
355fe088 96new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
083481d8
JJ
97{
98 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
99 append_pattern_def_seq (stmt_info, stmt);
100}
101
f71cf56a
UW
102/* Check whether STMT2 is in the same loop or basic block as STMT1.
103 Which of the two applies depends on whether we're currently doing
104 loop-based or basic-block-based vectorization, as determined by
105 the vinfo_for_stmt for STMT1 (which must be defined).
106
107 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
108 to be defined as well. */
109
110static bool
355fe088 111vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
f71cf56a
UW
112{
113 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
61d371eb 114 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
f71cf56a
UW
115}
116
9a7a4398
UW
117/* If the LHS of DEF_STMT has a single use, and that statement is
118 in the same loop or basic block, return it. */
119
355fe088
TS
120static gimple *
121vect_single_imm_use (gimple *def_stmt)
9a7a4398
UW
122{
123 tree lhs = gimple_assign_lhs (def_stmt);
124 use_operand_p use_p;
355fe088 125 gimple *use_stmt;
9a7a4398
UW
126
127 if (!single_imm_use (lhs, &use_p, &use_stmt))
128 return NULL;
129
130 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
131 return NULL;
132
133 return use_stmt;
134}
135
bc4fb355 136/* Check whether NAME, an ssa-name used in USE_STMT,
79d652a5 137 is a result of a type promotion, such that:
20f06221 138 DEF_STMT: NAME = NOP (name0)
383d9c83
IR
139 If CHECK_SIGN is TRUE, check that either both types are signed or both are
140 unsigned. */
20f06221
DN
141
142static bool
355fe088
TS
143type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
144 tree *orig_type, gimple **def_stmt, bool *promotion)
20f06221 145{
355fe088 146 gimple *dummy_gimple;
20f06221 147 stmt_vec_info stmt_vinfo;
20f06221
DN
148 tree type = TREE_TYPE (name);
149 tree oprnd0;
150 enum vect_def_type dt;
20f06221
DN
151
152 stmt_vinfo = vinfo_for_stmt (use_stmt);
81c40241 153 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
20f06221
DN
154 return false;
155
8644a673
IR
156 if (dt != vect_internal_def
157 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
158 return false;
159
bc4fb355 160 if (!*def_stmt)
20f06221
DN
161 return false;
162
726a989a 163 if (!is_gimple_assign (*def_stmt))
20f06221
DN
164 return false;
165
bc4fb355 166 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
20f06221
DN
167 return false;
168
726a989a 169 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221 170
bc4fb355
IR
171 *orig_type = TREE_TYPE (oprnd0);
172 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
173 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
174 return false;
175
176 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
177 *promotion = true;
bc4fb355 178 else
79d652a5 179 *promotion = false;
20f06221 180
81c40241 181 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
20f06221
DN
182 return false;
183
20f06221
DN
184 return true;
185}
186
726a989a
RB
187/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
188 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
189
190static tree
355fe088 191vect_recog_temp_ssa_var (tree type, gimple *stmt)
726a989a 192{
83d5977e 193 return make_temp_ssa_name (type, stmt, "patt");
726a989a 194}
20f06221
DN
195
196/* Function vect_recog_dot_prod_pattern
197
198 Try to find the following pattern:
199
200 type x_t, y_t;
201 TYPE1 prod;
202 TYPE2 sum = init;
203 loop:
204 sum_0 = phi <init, sum_1>
205 S1 x_t = ...
206 S2 y_t = ...
207 S3 x_T = (TYPE1) x_t;
208 S4 y_T = (TYPE1) y_t;
209 S5 prod = x_T * y_T;
210 [S6 prod = (TYPE2) prod; #optional]
211 S7 sum_1 = prod + sum_0;
212
b8698a0f
L
213 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
214 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 215 computation.
b8698a0f 216
20f06221
DN
217 Input:
218
51312233
IR
219 * STMTS: Contains a stmt from which the pattern search begins. In the
220 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
221 will be detected.
20f06221
DN
222
223 Output:
224
225 * TYPE_IN: The type of the input arguments to the pattern.
226
227 * TYPE_OUT: The type of the output of this pattern.
228
229 * Return value: A new stmt that will be used to replace the sequence of
230 stmts that constitute the pattern. In this case it will be:
231 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
232
233 Note: The dot-prod idiom is a widening reduction pattern that is
234 vectorized without preserving all the intermediate results. It
235 produces only N/2 (widened) results (by summing up pairs of
236 intermediate results) rather than all N results. Therefore, we
237 cannot allow this pattern when we want to get all the results and in
238 the correct order (as is the case when this computation is in an
239 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 240
355fe088
TS
241static gimple *
242vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
51312233 243 tree *type_out)
20f06221 244{
355fe088 245 gimple *stmt, *last_stmt = (*stmts)[0];
20f06221
DN
246 tree oprnd0, oprnd1;
247 tree oprnd00, oprnd01;
51312233 248 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 249 tree type, half_type;
355fe088 250 gimple *pattern_stmt;
20f06221 251 tree prod_type;
d29de1bf 252 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 253 struct loop *loop;
f471fe72 254 tree var;
bc4fb355 255 bool promotion;
20f06221 256
f5709183
IR
257 if (!loop_info)
258 return NULL;
259
260 loop = LOOP_VINFO_LOOP (loop_info);
261
328dc477
RB
262 /* We don't allow changing the order of the computation in the inner-loop
263 when doing outer-loop vectorization. */
264 if (loop && nested_in_vect_loop_p (loop, last_stmt))
265 return NULL;
266
51312233 267 if (!is_gimple_assign (last_stmt))
20f06221
DN
268 return NULL;
269
51312233 270 type = gimple_expr_type (last_stmt);
20f06221 271
b8698a0f 272 /* Look for the following pattern
20f06221
DN
273 DX = (TYPE1) X;
274 DY = (TYPE1) Y;
b8698a0f 275 DPROD = DX * DY;
20f06221
DN
276 DDPROD = (TYPE2) DPROD;
277 sum_1 = DDPROD + sum_0;
b8698a0f 278 In which
20f06221
DN
279 - DX is double the size of X
280 - DY is double the size of Y
281 - DX, DY, DPROD all have the same type
282 - sum is the same size of DPROD or bigger
283 - sum has been recognized as a reduction variable.
284
285 This is equivalent to:
286 DPROD = X w* Y; #widen mult
287 sum_1 = DPROD w+ sum_0; #widen summation
288 or
289 DPROD = X w* Y; #widen mult
290 sum_1 = DPROD + sum_0; #summation
291 */
292
293 /* Starting from LAST_STMT, follow the defs of its uses in search
294 of the above pattern. */
295
51312233 296 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
297 return NULL;
298
299 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
300 {
301 /* Has been detected as widening-summation? */
302
303 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a
RB
304 type = gimple_expr_type (stmt);
305 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
20f06221 306 return NULL;
726a989a
RB
307 oprnd0 = gimple_assign_rhs1 (stmt);
308 oprnd1 = gimple_assign_rhs2 (stmt);
20f06221
DN
309 half_type = TREE_TYPE (oprnd0);
310 }
311 else
312 {
355fe088 313 gimple *def_stmt;
20f06221 314
51312233
IR
315 oprnd0 = gimple_assign_rhs1 (last_stmt);
316 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
317 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
318 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221 319 return NULL;
51312233 320 stmt = last_stmt;
20f06221 321
bc4fb355
IR
322 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
323 &promotion)
324 && promotion)
20f06221
DN
325 {
326 stmt = def_stmt;
726a989a 327 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
328 }
329 else
330 half_type = type;
331 }
332
51312233 333 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
334 we know that oprnd1 is the reduction variable (defined by a loop-header
335 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
336 Left to check that oprnd0 is defined by a (widen_)mult_expr */
ba02d3bc
RG
337 if (TREE_CODE (oprnd0) != SSA_NAME)
338 return NULL;
20f06221
DN
339
340 prod_type = half_type;
341 stmt = SSA_NAME_DEF_STMT (oprnd0);
3cb35c12
CF
342
343 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
75264e61 344 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
3cb35c12
CF
345 return NULL;
346
b8698a0f 347 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 348 inside the loop (in case we are analyzing an outer-loop). */
726a989a 349 if (!is_gimple_assign (stmt))
b8698a0f 350 return NULL;
20f06221
DN
351 stmt_vinfo = vinfo_for_stmt (stmt);
352 gcc_assert (stmt_vinfo);
8644a673 353 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
b3130586 354 return NULL;
726a989a 355 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
20f06221
DN
356 return NULL;
357 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
358 {
359 /* Has been detected as a widening multiplication? */
360
361 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a 362 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
20f06221
DN
363 return NULL;
364 stmt_vinfo = vinfo_for_stmt (stmt);
365 gcc_assert (stmt_vinfo);
8644a673 366 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
726a989a
RB
367 oprnd00 = gimple_assign_rhs1 (stmt);
368 oprnd01 = gimple_assign_rhs2 (stmt);
56f8faae
CH
369 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
370 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
20f06221
DN
371 }
372 else
373 {
374 tree half_type0, half_type1;
355fe088 375 gimple *def_stmt;
20f06221
DN
376 tree oprnd0, oprnd1;
377
726a989a
RB
378 oprnd0 = gimple_assign_rhs1 (stmt);
379 oprnd1 = gimple_assign_rhs2 (stmt);
9600efe1
MM
380 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
381 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
20f06221 382 return NULL;
bc4fb355
IR
383 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
384 &promotion)
385 || !promotion)
20f06221 386 return NULL;
726a989a 387 oprnd00 = gimple_assign_rhs1 (def_stmt);
181f5f3e 388 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
bc4fb355
IR
389 &promotion)
390 || !promotion)
20f06221 391 return NULL;
726a989a 392 oprnd01 = gimple_assign_rhs1 (def_stmt);
9600efe1 393 if (!types_compatible_p (half_type0, half_type1))
20f06221
DN
394 return NULL;
395 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
396 return NULL;
397 }
398
399 half_type = TREE_TYPE (oprnd00);
400 *type_in = half_type;
401 *type_out = type;
b8698a0f 402
20f06221 403 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 404 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
405 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
406 oprnd00, oprnd01, oprnd1);
b8698a0f 407
73fbfcad 408 if (dump_enabled_p ())
20f06221 409 {
ccb3ad87 410 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 411 "vect_recog_dot_prod_pattern: detected: ");
ccb3ad87 412 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 413 dump_printf (MSG_NOTE, "\n");
20f06221 414 }
d29de1bf 415
726a989a 416 return pattern_stmt;
20f06221 417}
b8698a0f 418
51312233 419
79d652a5
CH
420/* Function vect_recog_sad_pattern
421
422 Try to find the following Sum of Absolute Difference (SAD) pattern:
423
424 type x_t, y_t;
425 signed TYPE1 diff, abs_diff;
426 TYPE2 sum = init;
427 loop:
428 sum_0 = phi <init, sum_1>
429 S1 x_t = ...
430 S2 y_t = ...
431 S3 x_T = (TYPE1) x_t;
432 S4 y_T = (TYPE1) y_t;
433 S5 diff = x_T - y_T;
434 S6 abs_diff = ABS_EXPR <diff>;
435 [S7 abs_diff = (TYPE2) abs_diff; #optional]
436 S8 sum_1 = abs_diff + sum_0;
437
438 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
439 same size of 'TYPE1' or bigger. This is a special case of a reduction
440 computation.
441
442 Input:
443
444 * STMTS: Contains a stmt from which the pattern search begins. In the
445 example, when this function is called with S8, the pattern
446 {S3,S4,S5,S6,S7,S8} will be detected.
447
448 Output:
449
450 * TYPE_IN: The type of the input arguments to the pattern.
451
452 * TYPE_OUT: The type of the output of this pattern.
453
454 * Return value: A new stmt that will be used to replace the sequence of
455 stmts that constitute the pattern. In this case it will be:
456 SAD_EXPR <x_t, y_t, sum_0>
457 */
458
355fe088
TS
459static gimple *
460vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
79d652a5
CH
461 tree *type_out)
462{
355fe088 463 gimple *last_stmt = (*stmts)[0];
79d652a5
CH
464 tree sad_oprnd0, sad_oprnd1;
465 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
466 tree half_type;
467 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
468 struct loop *loop;
469 bool promotion;
470
471 if (!loop_info)
472 return NULL;
473
474 loop = LOOP_VINFO_LOOP (loop_info);
475
328dc477
RB
476 /* We don't allow changing the order of the computation in the inner-loop
477 when doing outer-loop vectorization. */
478 if (loop && nested_in_vect_loop_p (loop, last_stmt))
479 return NULL;
480
79d652a5
CH
481 if (!is_gimple_assign (last_stmt))
482 return NULL;
483
484 tree sum_type = gimple_expr_type (last_stmt);
485
486 /* Look for the following pattern
487 DX = (TYPE1) X;
488 DY = (TYPE1) Y;
489 DDIFF = DX - DY;
490 DAD = ABS_EXPR <DDIFF>;
491 DDPROD = (TYPE2) DPROD;
492 sum_1 = DAD + sum_0;
493 In which
494 - DX is at least double the size of X
495 - DY is at least double the size of Y
496 - DX, DY, DDIFF, DAD all have the same type
497 - sum is the same size of DAD or bigger
498 - sum has been recognized as a reduction variable.
499
500 This is equivalent to:
501 DDIFF = X w- Y; #widen sub
502 DAD = ABS_EXPR <DDIFF>;
503 sum_1 = DAD w+ sum_0; #widen summation
504 or
505 DDIFF = X w- Y; #widen sub
506 DAD = ABS_EXPR <DDIFF>;
507 sum_1 = DAD + sum_0; #summation
508 */
509
510 /* Starting from LAST_STMT, follow the defs of its uses in search
511 of the above pattern. */
512
513 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
514 return NULL;
515
516 tree plus_oprnd0, plus_oprnd1;
517
518 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
519 {
520 /* Has been detected as widening-summation? */
521
355fe088 522 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
79d652a5
CH
523 sum_type = gimple_expr_type (stmt);
524 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
525 return NULL;
526 plus_oprnd0 = gimple_assign_rhs1 (stmt);
527 plus_oprnd1 = gimple_assign_rhs2 (stmt);
528 half_type = TREE_TYPE (plus_oprnd0);
529 }
530 else
531 {
355fe088 532 gimple *def_stmt;
79d652a5 533
79d652a5
CH
534 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
535 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
536 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
537 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
538 return NULL;
539
540 /* The type conversion could be promotion, demotion,
541 or just signed -> unsigned. */
542 if (type_conversion_p (plus_oprnd0, last_stmt, false,
543 &half_type, &def_stmt, &promotion))
544 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
545 else
546 half_type = sum_type;
547 }
548
549 /* So far so good. Since last_stmt was detected as a (summation) reduction,
550 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
551 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
552 Then check that plus_oprnd0 is defined by an abs_expr. */
553
554 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
555 return NULL;
556
557 tree abs_type = half_type;
355fe088 558 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
79d652a5
CH
559
560 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
561 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
562 return NULL;
563
564 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
565 inside the loop (in case we are analyzing an outer-loop). */
566 if (!is_gimple_assign (abs_stmt))
567 return NULL;
568
569 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
570 gcc_assert (abs_stmt_vinfo);
571 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
572 return NULL;
573 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
574 return NULL;
575
576 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
577 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
578 return NULL;
579 if (TYPE_UNSIGNED (abs_type))
580 return NULL;
581
582 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
583
584 if (TREE_CODE (abs_oprnd) != SSA_NAME)
585 return NULL;
586
355fe088 587 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
79d652a5
CH
588
589 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
590 if (!gimple_bb (diff_stmt)
591 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
592 return NULL;
593
594 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
595 inside the loop (in case we are analyzing an outer-loop). */
596 if (!is_gimple_assign (diff_stmt))
597 return NULL;
598
599 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
600 gcc_assert (diff_stmt_vinfo);
601 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
602 return NULL;
603 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
604 return NULL;
605
606 tree half_type0, half_type1;
355fe088 607 gimple *def_stmt;
79d652a5
CH
608
609 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
610 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
611
612 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
613 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
614 return NULL;
615 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
616 &half_type0, &def_stmt, &promotion)
617 || !promotion)
618 return NULL;
619 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
620
621 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
622 &half_type1, &def_stmt, &promotion)
623 || !promotion)
624 return NULL;
625 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
626
627 if (!types_compatible_p (half_type0, half_type1))
628 return NULL;
629 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
630 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
631 return NULL;
632
633 *type_in = TREE_TYPE (sad_oprnd0);
634 *type_out = sum_type;
635
636 /* Pattern detected. Create a stmt to be used to replace the pattern: */
637 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
355fe088
TS
638 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
639 sad_oprnd1, plus_oprnd1);
79d652a5
CH
640
641 if (dump_enabled_p ())
642 {
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "vect_recog_sad_pattern: detected: ");
645 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
646 dump_printf (MSG_NOTE, "\n");
647 }
648
79d652a5
CH
649 return pattern_stmt;
650}
651
652
36ba4aae
IR
653/* Handle widening operation by a constant. At the moment we support MULT_EXPR
654 and LSHIFT_EXPR.
655
656 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
657 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
51312233
IR
658
659 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
36ba4aae
IR
660 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
661 that satisfies the above restrictions, we can perform a widening opeartion
662 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
566d377a 663 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
51312233
IR
664
665static bool
355fe088 666vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
36ba4aae 667 tree const_oprnd, tree *oprnd,
355fe088
TS
668 gimple **wstmt, tree type,
669 tree *half_type, gimple *def_stmt)
51312233 670{
83d5977e 671 tree new_type, new_oprnd;
51312233 672
36ba4aae
IR
673 if (code != MULT_EXPR && code != LSHIFT_EXPR)
674 return false;
675
676 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
677 || (code == LSHIFT_EXPR
678 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
679 != 1))
680 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
51312233
IR
681 {
682 /* CONST_OPRND is a constant of HALF_TYPE. */
683 *oprnd = gimple_assign_rhs1 (def_stmt);
684 return true;
685 }
686
f71cf56a
UW
687 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
688 return false;
689
690 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
51312233
IR
691 return false;
692
36ba4aae 693 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
51312233
IR
694 a type 2 times bigger than HALF_TYPE. */
695 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
696 TYPE_UNSIGNED (type));
36ba4aae
IR
697 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
698 || (code == LSHIFT_EXPR
699 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
51312233
IR
700 return false;
701
566d377a
RB
702 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
703 *oprnd = gimple_assign_rhs1 (def_stmt);
704 new_oprnd = make_ssa_name (new_type);
705 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
706 *oprnd = new_oprnd;
51312233
IR
707
708 *half_type = new_type;
709 return true;
710}
711
712
20f06221
DN
713/* Function vect_recog_widen_mult_pattern
714
715 Try to find the following pattern:
716
d367387c
CH
717 type1 a_t;
718 type2 b_t;
20f06221
DN
719 TYPE a_T, b_T, prod_T;
720
721 S1 a_t = ;
722 S2 b_t = ;
723 S3 a_T = (TYPE) a_t;
724 S4 b_T = (TYPE) b_t;
725 S5 prod_T = a_T * b_T;
726
d367387c 727 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
20f06221 728
d47657bd 729 Also detect unsigned cases:
383d9c83 730
d367387c
CH
731 unsigned type1 a_t;
732 unsigned type2 b_t;
383d9c83
IR
733 unsigned TYPE u_prod_T;
734 TYPE a_T, b_T, prod_T;
735
736 S1 a_t = ;
737 S2 b_t = ;
738 S3 a_T = (TYPE) a_t;
739 S4 b_T = (TYPE) b_t;
740 S5 prod_T = a_T * b_T;
741 S6 u_prod_T = (unsigned TYPE) prod_T;
742
743 and multiplication by constants:
744
745 type a_t;
746 TYPE a_T, prod_T;
747
748 S1 a_t = ;
749 S3 a_T = (TYPE) a_t;
750 S5 prod_T = a_T * CONST;
751
51312233
IR
752 A special case of multiplication by constants is when 'TYPE' is 4 times
753 bigger than 'type', but CONST fits an intermediate type 2 times smaller
754 than 'TYPE'. In that case we create an additional pattern stmt for S3
755 to create a variable of the intermediate type, and perform widen-mult
756 on the intermediate type as well:
757
758 type a_t;
759 interm_type a_it;
760 TYPE a_T, prod_T, prod_T';
761
762 S1 a_t = ;
763 S3 a_T = (TYPE) a_t;
764 '--> a_it = (interm_type) a_t;
765 S5 prod_T = a_T * CONST;
766 '--> prod_T' = a_it w* CONST;
20f06221 767
51312233
IR
768 Input/Output:
769
770 * STMTS: Contains a stmt from which the pattern search begins. In the
771 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
772 is detected. In case of unsigned widen-mult, the original stmt (S5) is
773 replaced with S6 in STMTS. In case of multiplication by a constant
774 of an intermediate type (the last case above), STMTS also contains S3
775 (inserted before S5).
20f06221
DN
776
777 Output:
778
779 * TYPE_IN: The type of the input arguments to the pattern.
780
383d9c83 781 * TYPE_OUT: The type of the output of this pattern.
20f06221
DN
782
783 * Return value: A new stmt that will be used to replace the sequence of
383d9c83 784 stmts that constitute the pattern. In this case it will be:
20f06221 785 WIDEN_MULT <a_t, b_t>
d367387c
CH
786 If the result of WIDEN_MULT needs to be converted to a larger type, the
787 returned stmt will be this type conversion stmt.
20f06221
DN
788*/
789
355fe088
TS
790static gimple *
791vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
51312233 792 tree *type_in, tree *type_out)
20f06221 793{
355fe088
TS
794 gimple *last_stmt = stmts->pop ();
795 gimple *def_stmt0, *def_stmt1;
89d67cca
DN
796 tree oprnd0, oprnd1;
797 tree type, half_type0, half_type1;
355fe088 798 gimple *new_stmt = NULL, *pattern_stmt = NULL;
d367387c 799 tree vectype, vecitype;
726a989a 800 tree var;
89d67cca 801 enum tree_code dummy_code;
5d593372 802 int dummy_int;
9771b263 803 vec<tree> dummy_vec;
36ba4aae 804 bool op1_ok;
bc4fb355 805 bool promotion;
89d67cca 806
51312233 807 if (!is_gimple_assign (last_stmt))
89d67cca
DN
808 return NULL;
809
51312233 810 type = gimple_expr_type (last_stmt);
89d67cca
DN
811
812 /* Starting from LAST_STMT, follow the defs of its uses in search
813 of the above pattern. */
814
51312233 815 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
89d67cca
DN
816 return NULL;
817
51312233
IR
818 oprnd0 = gimple_assign_rhs1 (last_stmt);
819 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
820 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
821 || !types_compatible_p (TREE_TYPE (oprnd1), type))
89d67cca
DN
822 return NULL;
823
383d9c83 824 /* Check argument 0. */
bc4fb355
IR
825 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
826 &promotion)
827 || !promotion)
828 return NULL;
383d9c83 829 /* Check argument 1. */
bc4fb355
IR
830 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
831 &def_stmt1, &promotion);
89d67cca 832
bc4fb355 833 if (op1_ok && promotion)
383d9c83
IR
834 {
835 oprnd0 = gimple_assign_rhs1 (def_stmt0);
836 oprnd1 = gimple_assign_rhs1 (def_stmt1);
837 }
36ba4aae 838 else
383d9c83 839 {
51312233 840 if (TREE_CODE (oprnd1) == INTEGER_CST
383d9c83 841 && TREE_CODE (half_type0) == INTEGER_TYPE
36ba4aae 842 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
566d377a 843 &oprnd0, &new_stmt, type,
36ba4aae 844 &half_type0, def_stmt0))
bfdeda2c
JJ
845 {
846 half_type1 = half_type0;
847 oprnd1 = fold_convert (half_type1, oprnd1);
848 }
383d9c83
IR
849 else
850 return NULL;
851 }
852
d367387c
CH
853 /* If the two arguments have different sizes, convert the one with
854 the smaller type into the larger type. */
855 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
856 {
566d377a
RB
857 /* If we already used up the single-stmt slot give up. */
858 if (new_stmt)
859 return NULL;
860
d367387c 861 tree* oprnd = NULL;
355fe088 862 gimple *def_stmt = NULL;
d367387c
CH
863
864 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
865 {
866 def_stmt = def_stmt0;
867 half_type0 = half_type1;
868 oprnd = &oprnd0;
869 }
870 else
871 {
872 def_stmt = def_stmt1;
873 half_type1 = half_type0;
874 oprnd = &oprnd1;
875 }
876
877 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
b731b390 878 tree new_oprnd = make_ssa_name (half_type0);
0d0e4a03 879 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
d367387c
CH
880 *oprnd = new_oprnd;
881 }
882
383d9c83
IR
883 /* Handle unsigned case. Look for
884 S6 u_prod_T = (unsigned TYPE) prod_T;
885 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
886 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
887 {
355fe088 888 gimple *use_stmt;
9a7a4398 889 tree use_lhs;
383d9c83
IR
890 tree use_type;
891
892 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
893 return NULL;
894
9a7a4398
UW
895 use_stmt = vect_single_imm_use (last_stmt);
896 if (!use_stmt || !is_gimple_assign (use_stmt)
625a9766 897 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
383d9c83
IR
898 return NULL;
899
900 use_lhs = gimple_assign_lhs (use_stmt);
901 use_type = TREE_TYPE (use_lhs);
902 if (!INTEGRAL_TYPE_P (use_type)
903 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
904 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
905 return NULL;
906
907 type = use_type;
51312233 908 last_stmt = use_stmt;
383d9c83 909 }
89d67cca 910
9600efe1 911 if (!types_compatible_p (half_type0, half_type1))
89d67cca
DN
912 return NULL;
913
d367387c
CH
914 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
915 to get an intermediate result of type ITYPE. In this case we need
916 to build a statement to convert this intermediate result to type TYPE. */
917 tree itype = type;
918 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
919 itype = build_nonstandard_integer_type
920 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
921 TYPE_UNSIGNED (type));
922
89d67cca 923 /* Pattern detected. */
73fbfcad 924 if (dump_enabled_p ())
ccb3ad87 925 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 926 "vect_recog_widen_mult_pattern: detected:\n");
89d67cca
DN
927
928 /* Check target support */
929 vectype = get_vectype_for_scalar_type (half_type0);
d367387c 930 vecitype = get_vectype_for_scalar_type (itype);
03d3e953 931 if (!vectype
d367387c 932 || !vecitype
51312233 933 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
d367387c 934 vecitype, vectype,
a86ec597
RH
935 &dummy_code, &dummy_code,
936 &dummy_int, &dummy_vec))
89d67cca
DN
937 return NULL;
938
939 *type_in = vectype;
d367387c 940 *type_out = get_vectype_for_scalar_type (type);
89d67cca
DN
941
942 /* Pattern supported. Create a stmt to be used to replace the pattern: */
d367387c 943 var = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 944 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
726a989a 945
d367387c 946 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
d367387c
CH
947 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
948
949 /* If the original two operands have different sizes, we may need to convert
950 the smaller one into the larget type. If this is the case, at this point
951 the new stmt is already built. */
952 if (new_stmt)
953 {
954 append_pattern_def_seq (stmt_vinfo, new_stmt);
955 stmt_vec_info new_stmt_info
310213d4 956 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
d367387c
CH
957 set_vinfo_for_stmt (new_stmt, new_stmt_info);
958 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
959 }
960
961 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
962 the result of the widen-mult operation into type TYPE. */
963 if (itype != type)
964 {
965 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
966 stmt_vec_info pattern_stmt_info
310213d4 967 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
d367387c
CH
968 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
969 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
0d0e4a03
JJ
970 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
971 NOP_EXPR,
972 gimple_assign_lhs (pattern_stmt));
d367387c
CH
973 }
974
73fbfcad 975 if (dump_enabled_p ())
78c60e3d 976 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
726a989a 977
9771b263 978 stmts->safe_push (last_stmt);
726a989a 979 return pattern_stmt;
20f06221
DN
980}
981
982
0b2229b0
RG
983/* Function vect_recog_pow_pattern
984
985 Try to find the following pattern:
986
987 x = POW (y, N);
988
989 with POW being one of pow, powf, powi, powif and N being
990 either 2 or 0.5.
991
992 Input:
993
994 * LAST_STMT: A stmt from which the pattern search begins.
995
996 Output:
997
998 * TYPE_IN: The type of the input arguments to the pattern.
999
1000 * TYPE_OUT: The type of the output of this pattern.
1001
1002 * Return value: A new stmt that will be used to replace the sequence of
1003 stmts that constitute the pattern. In this case it will be:
726a989a 1004 x = x * x
0b2229b0 1005 or
726a989a 1006 x = sqrt (x)
0b2229b0
RG
1007*/
1008
355fe088
TS
1009static gimple *
1010vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
51312233 1011 tree *type_out)
0b2229b0 1012{
355fe088 1013 gimple *last_stmt = (*stmts)[0];
7a31e5ef 1014 tree base, exp = NULL;
355fe088 1015 gimple *stmt;
726a989a 1016 tree var;
0b2229b0 1017
51312233 1018 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
0b2229b0
RG
1019 return NULL;
1020
7a31e5ef 1021 switch (gimple_call_combined_fn (last_stmt))
0b2229b0 1022 {
7a31e5ef
RS
1023 CASE_CFN_POW:
1024 CASE_CFN_POWI:
51312233
IR
1025 base = gimple_call_arg (last_stmt, 0);
1026 exp = gimple_call_arg (last_stmt, 1);
0b2229b0
RG
1027 if (TREE_CODE (exp) != REAL_CST
1028 && TREE_CODE (exp) != INTEGER_CST)
726a989a 1029 return NULL;
0b2229b0
RG
1030 break;
1031
726a989a
RB
1032 default:
1033 return NULL;
0b2229b0
RG
1034 }
1035
1036 /* We now have a pow or powi builtin function call with a constant
1037 exponent. */
1038
0b2229b0
RG
1039 *type_out = NULL_TREE;
1040
1041 /* Catch squaring. */
9541ffee 1042 if ((tree_fits_shwi_p (exp)
9439e9a1 1043 && tree_to_shwi (exp) == 2)
0b2229b0 1044 || (TREE_CODE (exp) == REAL_CST
624d31fe 1045 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
c6b1b49b
RG
1046 {
1047 *type_in = TREE_TYPE (base);
726a989a
RB
1048
1049 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
0d0e4a03 1050 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
726a989a 1051 return stmt;
c6b1b49b 1052 }
0b2229b0
RG
1053
1054 /* Catch square root. */
1055 if (TREE_CODE (exp) == REAL_CST
624d31fe 1056 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
0b2229b0 1057 {
c6b1b49b 1058 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
d95ab70a
RS
1059 if (*type_in
1060 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1061 OPTIMIZE_FOR_SPEED))
c6b1b49b 1062 {
b4e5bc47
RS
1063 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1064 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1065 gimple_call_set_lhs (stmt, var);
1066 return stmt;
c6b1b49b 1067 }
0b2229b0
RG
1068 }
1069
726a989a 1070 return NULL;
0b2229b0
RG
1071}
1072
1073
20f06221
DN
1074/* Function vect_recog_widen_sum_pattern
1075
1076 Try to find the following pattern:
1077
b8698a0f 1078 type x_t;
20f06221
DN
1079 TYPE x_T, sum = init;
1080 loop:
1081 sum_0 = phi <init, sum_1>
1082 S1 x_t = *p;
1083 S2 x_T = (TYPE) x_t;
1084 S3 sum_1 = x_T + sum_0;
1085
b8698a0f 1086 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 1087 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 1088 a special case of a reduction computation.
20f06221
DN
1089
1090 Input:
1091
1092 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1093 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 1094
20f06221 1095 Output:
b8698a0f 1096
20f06221
DN
1097 * TYPE_IN: The type of the input arguments to the pattern.
1098
1099 * TYPE_OUT: The type of the output of this pattern.
1100
1101 * Return value: A new stmt that will be used to replace the sequence of
1102 stmts that constitute the pattern. In this case it will be:
1103 WIDEN_SUM <x_t, sum_0>
d29de1bf 1104
b8698a0f 1105 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 1106 vectorized without preserving all the intermediate results. It
b8698a0f
L
1107 produces only N/2 (widened) results (by summing up pairs of
1108 intermediate results) rather than all N results. Therefore, we
1109 cannot allow this pattern when we want to get all the results and in
1110 the correct order (as is the case when this computation is in an
d29de1bf 1111 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 1112
355fe088
TS
1113static gimple *
1114vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
51312233 1115 tree *type_out)
20f06221 1116{
355fe088 1117 gimple *stmt, *last_stmt = (*stmts)[0];
20f06221 1118 tree oprnd0, oprnd1;
51312233 1119 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 1120 tree type, half_type;
355fe088 1121 gimple *pattern_stmt;
d29de1bf 1122 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 1123 struct loop *loop;
726a989a 1124 tree var;
bc4fb355 1125 bool promotion;
20f06221 1126
f5709183
IR
1127 if (!loop_info)
1128 return NULL;
1129
1130 loop = LOOP_VINFO_LOOP (loop_info);
1131
328dc477
RB
1132 /* We don't allow changing the order of the computation in the inner-loop
1133 when doing outer-loop vectorization. */
1134 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1135 return NULL;
1136
51312233 1137 if (!is_gimple_assign (last_stmt))
20f06221
DN
1138 return NULL;
1139
51312233 1140 type = gimple_expr_type (last_stmt);
20f06221
DN
1141
1142 /* Look for the following pattern
1143 DX = (TYPE) X;
1144 sum_1 = DX + sum_0;
1145 In which DX is at least double the size of X, and sum_1 has been
1146 recognized as a reduction variable.
1147 */
1148
1149 /* Starting from LAST_STMT, follow the defs of its uses in search
1150 of the above pattern. */
1151
51312233 1152 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
1153 return NULL;
1154
51312233
IR
1155 oprnd0 = gimple_assign_rhs1 (last_stmt);
1156 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
1157 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1158 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221
DN
1159 return NULL;
1160
51312233 1161 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
1162 we know that oprnd1 is the reduction variable (defined by a loop-header
1163 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1164 Left to check that oprnd0 is defined by a cast from type 'type' to type
1165 'TYPE'. */
1166
bc4fb355
IR
1167 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1168 &promotion)
1169 || !promotion)
1170 return NULL;
20f06221 1171
726a989a 1172 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
1173 *type_in = half_type;
1174 *type_out = type;
1175
1176 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 1177 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1178 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
726a989a 1179
73fbfcad 1180 if (dump_enabled_p ())
20f06221 1181 {
ccb3ad87 1182 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1183 "vect_recog_widen_sum_pattern: detected: ");
ccb3ad87 1184 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1185 dump_printf (MSG_NOTE, "\n");
20f06221 1186 }
d29de1bf 1187
726a989a 1188 return pattern_stmt;
20f06221
DN
1189}
1190
1191
1107f3ae
IR
1192/* Return TRUE if the operation in STMT can be performed on a smaller type.
1193
1194 Input:
1195 STMT - a statement to check.
1196 DEF - we support operations with two operands, one of which is constant.
1197 The other operand can be defined by a demotion operation, or by a
1198 previous statement in a sequence of over-promoted operations. In the
1199 later case DEF is used to replace that operand. (It is defined by a
1200 pattern statement we created for the previous statement in the
1201 sequence).
1202
1203 Input/output:
1204 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1205 NULL, it's the type of DEF.
1206 STMTS - additional pattern statements. If a pattern statement (type
1207 conversion) is created in this function, its original statement is
1208 added to STMTS.
1209
1210 Output:
1211 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1212 operands to use in the new pattern statement for STMT (will be created
1213 in vect_recog_over_widening_pattern ()).
1214 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1215 statements for STMT: the first one is a type promotion and the second
1216 one is the operation itself. We return the type promotion statement
363477c0 1217 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1107f3ae
IR
1218 the second pattern statement. */
1219
1220static bool
355fe088
TS
1221vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1222 tree *op0, tree *op1, gimple **new_def_stmt,
1223 vec<gimple *> *stmts)
1107f3ae
IR
1224{
1225 enum tree_code code;
1226 tree const_oprnd, oprnd;
83d5977e 1227 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
355fe088 1228 gimple *def_stmt, *new_stmt;
1107f3ae 1229 bool first = false;
bc4fb355 1230 bool promotion;
f5709183 1231
d6e1acf6
JJ
1232 *op0 = NULL_TREE;
1233 *op1 = NULL_TREE;
1107f3ae
IR
1234 *new_def_stmt = NULL;
1235
1236 if (!is_gimple_assign (stmt))
1237 return false;
1238
1239 code = gimple_assign_rhs_code (stmt);
1240 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1241 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1242 return false;
1243
1244 oprnd = gimple_assign_rhs1 (stmt);
1245 const_oprnd = gimple_assign_rhs2 (stmt);
1246 type = gimple_expr_type (stmt);
1247
1248 if (TREE_CODE (oprnd) != SSA_NAME
1249 || TREE_CODE (const_oprnd) != INTEGER_CST)
1250 return false;
1251
9ef7adc0
RG
1252 /* If oprnd has other uses besides that in stmt we cannot mark it
1253 as being part of a pattern only. */
1254 if (!has_single_use (oprnd))
1255 return false;
1256
1107f3ae
IR
1257 /* If we are in the middle of a sequence, we use DEF from a previous
1258 statement. Otherwise, OPRND has to be a result of type promotion. */
1259 if (*new_type)
1260 {
1261 half_type = *new_type;
1262 oprnd = def;
1263 }
1264 else
1265 {
1266 first = true;
bc4fb355 1267 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
f71cf56a
UW
1268 &promotion)
1269 || !promotion
1270 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1107f3ae
IR
1271 return false;
1272 }
1273
1274 /* Can we perform the operation on a smaller type? */
1275 switch (code)
1276 {
1277 case BIT_IOR_EXPR:
1278 case BIT_XOR_EXPR:
1279 case BIT_AND_EXPR:
1280 if (!int_fits_type_p (const_oprnd, half_type))
1281 {
1282 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1283 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1284 return false;
1285
1286 interm_type = build_nonstandard_integer_type (
1287 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1288 if (!int_fits_type_p (const_oprnd, interm_type))
1289 return false;
1290 }
1291
1292 break;
1293
1294 case LSHIFT_EXPR:
1295 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1296 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1297 return false;
1298
1299 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1300 (e.g., if the original value was char, the shift amount is at most 8
1301 if we want to use short). */
1302 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1303 return false;
1304
1305 interm_type = build_nonstandard_integer_type (
1306 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1307
1308 if (!vect_supportable_shift (code, interm_type))
1309 return false;
1310
1311 break;
1312
1313 case RSHIFT_EXPR:
1314 if (vect_supportable_shift (code, half_type))
1315 break;
1316
1317 /* Try intermediate type - HALF_TYPE is not supported. */
1318 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1319 return false;
1320
1321 interm_type = build_nonstandard_integer_type (
1322 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1323
1324 if (!vect_supportable_shift (code, interm_type))
1325 return false;
1326
1327 break;
1328
1329 default:
1330 gcc_unreachable ();
1331 }
1332
1333 /* There are four possible cases:
1334 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1335 the first statement in the sequence)
1336 a. The original, HALF_TYPE, is not enough - we replace the promotion
1337 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1338 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1339 promotion.
1340 2. OPRND is defined by a pattern statement we created.
1341 a. Its type is not sufficient for the operation, we create a new stmt:
1342 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1343 this statement in NEW_DEF_STMT, and it is later put in
363477c0 1344 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1107f3ae
IR
1345 b. OPRND is good to use in the new statement. */
1346 if (first)
1347 {
1348 if (interm_type)
1349 {
1350 /* Replace the original type conversion HALF_TYPE->TYPE with
1351 HALF_TYPE->INTERM_TYPE. */
1352 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1353 {
1354 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1355 /* Check if the already created pattern stmt is what we need. */
1356 if (!is_gimple_assign (new_stmt)
625a9766 1357 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1107f3ae
IR
1358 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1359 return false;
1360
9771b263 1361 stmts->safe_push (def_stmt);
1107f3ae
IR
1362 oprnd = gimple_assign_lhs (new_stmt);
1363 }
1364 else
1365 {
1366 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1367 oprnd = gimple_assign_rhs1 (def_stmt);
b731b390 1368 new_oprnd = make_ssa_name (interm_type);
0d0e4a03 1369 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1107f3ae 1370 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
9771b263 1371 stmts->safe_push (def_stmt);
1107f3ae
IR
1372 oprnd = new_oprnd;
1373 }
1374 }
1375 else
1376 {
1377 /* Retrieve the operand before the type promotion. */
1378 oprnd = gimple_assign_rhs1 (def_stmt);
1379 }
1380 }
1381 else
1382 {
1383 if (interm_type)
1384 {
1385 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
b731b390 1386 new_oprnd = make_ssa_name (interm_type);
0d0e4a03 1387 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1107f3ae
IR
1388 oprnd = new_oprnd;
1389 *new_def_stmt = new_stmt;
1390 }
1391
1392 /* Otherwise, OPRND is already set. */
1393 }
1394
1395 if (interm_type)
1396 *new_type = interm_type;
1397 else
1398 *new_type = half_type;
1399
1400 *op0 = oprnd;
1401 *op1 = fold_convert (*new_type, const_oprnd);
1402
1403 return true;
1404}
1405
1406
1407/* Try to find a statement or a sequence of statements that can be performed
1408 on a smaller type:
1409
1410 type x_t;
1411 TYPE x_T, res0_T, res1_T;
1412 loop:
1413 S1 x_t = *p;
1414 S2 x_T = (TYPE) x_t;
1415 S3 res0_T = op (x_T, C0);
1416 S4 res1_T = op (res0_T, C1);
1417 S5 ... = () res1_T; - type demotion
1418
1419 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1420 constants.
1421 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1422 be 'type' or some intermediate type. For now, we expect S5 to be a type
71c92d17 1423 demotion operation. We also check that S3 and S4 have only one use. */
1107f3ae 1424
355fe088
TS
1425static gimple *
1426vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1107f3ae
IR
1427 tree *type_in, tree *type_out)
1428{
355fe088
TS
1429 gimple *stmt = stmts->pop ();
1430 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1431 *use_stmt = NULL;
9a7a4398 1432 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
83d5977e 1433 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1107f3ae 1434 bool first;
b2a1a74d 1435 tree type = NULL;
1107f3ae
IR
1436
1437 first = true;
1438 while (1)
1439 {
1440 if (!vinfo_for_stmt (stmt)
1441 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1442 return NULL;
1443
1444 new_def_stmt = NULL;
1445 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1446 &op0, &op1, &new_def_stmt,
1447 stmts))
1448 {
1449 if (first)
1450 return NULL;
1451 else
1452 break;
1453 }
1454
1455 /* STMT can be performed on a smaller type. Check its uses. */
9a7a4398
UW
1456 use_stmt = vect_single_imm_use (stmt);
1457 if (!use_stmt || !is_gimple_assign (use_stmt))
1107f3ae
IR
1458 return NULL;
1459
1460 /* Create pattern statement for STMT. */
1461 vectype = get_vectype_for_scalar_type (new_type);
1462 if (!vectype)
1463 return NULL;
1464
1465 /* We want to collect all the statements for which we create pattern
1466 statetments, except for the case when the last statement in the
1467 sequence doesn't have a corresponding pattern statement. In such
1468 case we associate the last pattern statement with the last statement
36ba4aae 1469 in the sequence. Therefore, we only add the original statement to
1107f3ae
IR
1470 the list if we know that it is not the last. */
1471 if (prev_stmt)
9771b263 1472 stmts->safe_push (prev_stmt);
1107f3ae
IR
1473
1474 var = vect_recog_temp_ssa_var (new_type, NULL);
62371b92 1475 pattern_stmt
0d0e4a03 1476 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1107f3ae 1477 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
083481d8 1478 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1107f3ae 1479
73fbfcad 1480 if (dump_enabled_p ())
1107f3ae 1481 {
ccb3ad87 1482 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1483 "created pattern stmt: ");
ccb3ad87 1484 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1485 dump_printf (MSG_NOTE, "\n");
1107f3ae
IR
1486 }
1487
b2a1a74d 1488 type = gimple_expr_type (stmt);
1107f3ae
IR
1489 prev_stmt = stmt;
1490 stmt = use_stmt;
1491
1492 first = false;
1493 }
1494
1495 /* We got a sequence. We expect it to end with a type demotion operation.
1496 Otherwise, we quit (for now). There are three possible cases: the
1497 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1498 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1499 NEW_TYPE differs (we create a new conversion statement). */
1500 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1501 {
1502 use_lhs = gimple_assign_lhs (use_stmt);
1503 use_type = TREE_TYPE (use_lhs);
82db3d43 1504 /* Support only type demotion or signedess change. */
1107f3ae 1505 if (!INTEGRAL_TYPE_P (use_type)
82db3d43 1506 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1107f3ae
IR
1507 return NULL;
1508
82db3d43
IR
1509 /* Check that NEW_TYPE is not bigger than the conversion result. */
1510 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1511 return NULL;
1512
1107f3ae
IR
1513 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1514 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1515 {
1516 /* Create NEW_TYPE->USE_TYPE conversion. */
b731b390 1517 new_oprnd = make_ssa_name (use_type);
0d0e4a03 1518 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1107f3ae
IR
1519 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1520
1521 *type_in = get_vectype_for_scalar_type (new_type);
1522 *type_out = get_vectype_for_scalar_type (use_type);
1523
1524 /* We created a pattern statement for the last statement in the
1525 sequence, so we don't need to associate it with the pattern
1526 statement created for PREV_STMT. Therefore, we add PREV_STMT
1527 to the list in order to mark it later in vect_pattern_recog_1. */
1528 if (prev_stmt)
9771b263 1529 stmts->safe_push (prev_stmt);
1107f3ae
IR
1530 }
1531 else
1532 {
1533 if (prev_stmt)
363477c0
JJ
1534 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1535 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1107f3ae
IR
1536
1537 *type_in = vectype;
1538 *type_out = NULL_TREE;
1539 }
1540
9771b263 1541 stmts->safe_push (use_stmt);
1107f3ae
IR
1542 }
1543 else
1544 /* TODO: support general case, create a conversion to the correct type. */
1545 return NULL;
1546
1547 /* Pattern detected. */
73fbfcad 1548 if (dump_enabled_p ())
1107f3ae 1549 {
ccb3ad87 1550 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1551 "vect_recog_over_widening_pattern: detected: ");
ccb3ad87 1552 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1553 dump_printf (MSG_NOTE, "\n");
1107f3ae
IR
1554 }
1555
1556 return pattern_stmt;
1557}
1558
36ba4aae
IR
1559/* Detect widening shift pattern:
1560
1561 type a_t;
1562 TYPE a_T, res_T;
1563
1564 S1 a_t = ;
1565 S2 a_T = (TYPE) a_t;
1566 S3 res_T = a_T << CONST;
1567
1568 where type 'TYPE' is at least double the size of type 'type'.
1569
33018845
UW
1570 Also detect cases where the shift result is immediately converted
1571 to another type 'result_type' that is no larger in size than 'TYPE'.
1572 In those cases we perform a widen-shift that directly results in
1573 'result_type', to avoid a possible over-widening situation:
36ba4aae 1574
33018845 1575 type a_t;
36ba4aae 1576 TYPE a_T, res_T;
33018845 1577 result_type res_result;
36ba4aae
IR
1578
1579 S1 a_t = ;
1580 S2 a_T = (TYPE) a_t;
1581 S3 res_T = a_T << CONST;
33018845
UW
1582 S4 res_result = (result_type) res_T;
1583 '--> res_result' = a_t w<< CONST;
36ba4aae
IR
1584
1585 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1586 create an additional pattern stmt for S2 to create a variable of an
1587 intermediate type, and perform widen-shift on the intermediate type:
1588
1589 type a_t;
1590 interm_type a_it;
1591 TYPE a_T, res_T, res_T';
1592
1593 S1 a_t = ;
1594 S2 a_T = (TYPE) a_t;
1595 '--> a_it = (interm_type) a_t;
1596 S3 res_T = a_T << CONST;
1597 '--> res_T' = a_it <<* CONST;
1598
1599 Input/Output:
1600
1601 * STMTS: Contains a stmt from which the pattern search begins.
1602 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1603 in STMTS. When an intermediate type is used and a pattern statement is
1604 created for S2, we also put S2 here (before S3).
1605
1606 Output:
1607
1608 * TYPE_IN: The type of the input arguments to the pattern.
1609
1610 * TYPE_OUT: The type of the output of this pattern.
1611
1612 * Return value: A new stmt that will be used to replace the sequence of
1613 stmts that constitute the pattern. In this case it will be:
1614 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1615
355fe088
TS
1616static gimple *
1617vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
36ba4aae
IR
1618 tree *type_in, tree *type_out)
1619{
355fe088
TS
1620 gimple *last_stmt = stmts->pop ();
1621 gimple *def_stmt0;
36ba4aae
IR
1622 tree oprnd0, oprnd1;
1623 tree type, half_type0;
355fe088 1624 gimple *pattern_stmt;
36ba4aae 1625 tree vectype, vectype_out = NULL_TREE;
36ba4aae
IR
1626 tree var;
1627 enum tree_code dummy_code;
1628 int dummy_int;
9771b263 1629 vec<tree> dummy_vec;
355fe088 1630 gimple *use_stmt;
bc4fb355 1631 bool promotion;
36ba4aae
IR
1632
1633 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1634 return NULL;
1635
36ba4aae 1636 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
33018845 1637 return NULL;
36ba4aae
IR
1638
1639 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1640 return NULL;
1641
1642 oprnd0 = gimple_assign_rhs1 (last_stmt);
1643 oprnd1 = gimple_assign_rhs2 (last_stmt);
1644 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1645 return NULL;
1646
1647 /* Check operand 0: it has to be defined by a type promotion. */
bc4fb355 1648 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
566d377a 1649 &promotion)
bc4fb355
IR
1650 || !promotion)
1651 return NULL;
36ba4aae
IR
1652
1653 /* Check operand 1: has to be positive. We check that it fits the type
1654 in vect_handle_widen_op_by_const (). */
1655 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1656 return NULL;
1657
1658 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1659 type = gimple_expr_type (last_stmt);
1660
33018845
UW
1661 /* Check for subsequent conversion to another type. */
1662 use_stmt = vect_single_imm_use (last_stmt);
1663 if (use_stmt && is_gimple_assign (use_stmt)
1664 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1665 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1666 {
1667 tree use_lhs = gimple_assign_lhs (use_stmt);
1668 tree use_type = TREE_TYPE (use_lhs);
1669
1670 if (INTEGRAL_TYPE_P (use_type)
1671 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1672 {
1673 last_stmt = use_stmt;
1674 type = use_type;
1675 }
1676 }
1677
36ba4aae 1678 /* Check if this a widening operation. */
355fe088 1679 gimple *wstmt = NULL;
36ba4aae 1680 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
566d377a 1681 &oprnd0, &wstmt,
36ba4aae
IR
1682 type, &half_type0, def_stmt0))
1683 return NULL;
1684
36ba4aae 1685 /* Pattern detected. */
73fbfcad 1686 if (dump_enabled_p ())
ccb3ad87 1687 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1688 "vect_recog_widen_shift_pattern: detected:\n");
36ba4aae
IR
1689
1690 /* Check target support. */
1691 vectype = get_vectype_for_scalar_type (half_type0);
1692 vectype_out = get_vectype_for_scalar_type (type);
1693
1694 if (!vectype
1695 || !vectype_out
1696 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1697 vectype_out, vectype,
a86ec597
RH
1698 &dummy_code, &dummy_code,
1699 &dummy_int, &dummy_vec))
36ba4aae
IR
1700 return NULL;
1701
1702 *type_in = vectype;
1703 *type_out = vectype_out;
1704
1705 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1706 var = vect_recog_temp_ssa_var (type, NULL);
1707 pattern_stmt =
0d0e4a03 1708 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
566d377a
RB
1709 if (wstmt)
1710 {
1711 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
566d377a
RB
1712 new_pattern_def_seq (stmt_vinfo, wstmt);
1713 stmt_vec_info new_stmt_info
310213d4 1714 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
566d377a
RB
1715 set_vinfo_for_stmt (wstmt, new_stmt_info);
1716 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1717 }
36ba4aae 1718
73fbfcad 1719 if (dump_enabled_p ())
78c60e3d 1720 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
7e9a3abb
JJ
1721
1722 stmts->safe_push (last_stmt);
1723 return pattern_stmt;
1724}
1725
1726/* Detect a rotate pattern wouldn't be otherwise vectorized:
1727
1728 type a_t, b_t, c_t;
1729
1730 S0 a_t = b_t r<< c_t;
1731
1732 Input/Output:
1733
1734 * STMTS: Contains a stmt from which the pattern search begins,
1735 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1736 with a sequence:
1737
1738 S1 d_t = -c_t;
1739 S2 e_t = d_t & (B - 1);
1740 S3 f_t = b_t << c_t;
1741 S4 g_t = b_t >> e_t;
1742 S0 a_t = f_t | g_t;
1743
1744 where B is element bitsize of type.
1745
1746 Output:
1747
1748 * TYPE_IN: The type of the input arguments to the pattern.
1749
1750 * TYPE_OUT: The type of the output of this pattern.
1751
1752 * Return value: A new stmt that will be used to replace the rotate
1753 S0 stmt. */
1754
355fe088
TS
1755static gimple *
1756vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
7e9a3abb 1757{
355fe088 1758 gimple *last_stmt = stmts->pop ();
7e9a3abb 1759 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
355fe088 1760 gimple *pattern_stmt, *def_stmt;
7e9a3abb
JJ
1761 enum tree_code rhs_code;
1762 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
310213d4 1763 vec_info *vinfo = stmt_vinfo->vinfo;
7e9a3abb
JJ
1764 enum vect_def_type dt;
1765 optab optab1, optab2;
68119618 1766 edge ext_def = NULL;
7e9a3abb
JJ
1767
1768 if (!is_gimple_assign (last_stmt))
1769 return NULL;
1770
1771 rhs_code = gimple_assign_rhs_code (last_stmt);
1772 switch (rhs_code)
1773 {
1774 case LROTATE_EXPR:
1775 case RROTATE_EXPR:
1776 break;
1777 default:
1778 return NULL;
1779 }
1780
1781 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1782 return NULL;
1783
1784 lhs = gimple_assign_lhs (last_stmt);
1785 oprnd0 = gimple_assign_rhs1 (last_stmt);
1786 type = TREE_TYPE (oprnd0);
1787 oprnd1 = gimple_assign_rhs2 (last_stmt);
1788 if (TREE_CODE (oprnd0) != SSA_NAME
1789 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1790 || !INTEGRAL_TYPE_P (type)
1791 || !TYPE_UNSIGNED (type))
1792 return NULL;
1793
81c40241 1794 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
7e9a3abb
JJ
1795 return NULL;
1796
1797 if (dt != vect_internal_def
1798 && dt != vect_constant_def
1799 && dt != vect_external_def)
1800 return NULL;
1801
1802 vectype = get_vectype_for_scalar_type (type);
1803 if (vectype == NULL_TREE)
1804 return NULL;
1805
1806 /* If vector/vector or vector/scalar rotate is supported by the target,
1807 don't do anything here. */
1808 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1809 if (optab1
1810 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1811 return NULL;
1812
310213d4 1813 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
7e9a3abb
JJ
1814 {
1815 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1816 if (optab2
1817 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1818 return NULL;
1819 }
1820
1821 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1822 don't do anything here either. */
1823 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1824 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1825 if (!optab1
1826 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1827 || !optab2
1828 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1829 {
310213d4 1830 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
7e9a3abb
JJ
1831 return NULL;
1832 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1833 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1834 if (!optab1
1835 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1836 || !optab2
1837 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1838 return NULL;
1839 }
1840
1841 *type_in = vectype;
1842 *type_out = vectype;
1843 if (*type_in == NULL_TREE)
1844 return NULL;
1845
68119618
JJ
1846 if (dt == vect_external_def
1847 && TREE_CODE (oprnd1) == SSA_NAME
310213d4 1848 && is_a <loop_vec_info> (vinfo))
68119618 1849 {
310213d4 1850 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
68119618
JJ
1851 ext_def = loop_preheader_edge (loop);
1852 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1853 {
1854 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1855 if (bb == NULL
1856 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1857 ext_def = NULL;
1858 }
1859 }
1860
7e9a3abb
JJ
1861 def = NULL_TREE;
1862 if (TREE_CODE (oprnd1) == INTEGER_CST
1863 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1864 def = oprnd1;
1865 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1866 {
1867 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1868 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1869 && TYPE_PRECISION (TREE_TYPE (rhs1))
1870 == TYPE_PRECISION (type))
1871 def = rhs1;
1872 }
1873
1874 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1875 if (def == NULL_TREE)
1876 {
1877 def = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1878 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
68119618
JJ
1879 if (ext_def)
1880 {
1881 basic_block new_bb
1882 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1883 gcc_assert (!new_bb);
1884 }
1885 else
1886 append_pattern_def_seq (stmt_vinfo, def_stmt);
7e9a3abb
JJ
1887 }
1888 stype = TREE_TYPE (def);
1889
1890 if (TREE_CODE (def) == INTEGER_CST)
1891 {
cc269bb6 1892 if (!tree_fits_uhwi_p (def)
7d362f6c 1893 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
7e9a3abb
JJ
1894 || integer_zerop (def))
1895 return NULL;
1896 def2 = build_int_cst (stype,
1897 GET_MODE_PRECISION (TYPE_MODE (type))
ae7e9ddd 1898 - tree_to_uhwi (def));
7e9a3abb
JJ
1899 }
1900 else
1901 {
1902 tree vecstype = get_vectype_for_scalar_type (stype);
1903 stmt_vec_info def_stmt_vinfo;
1904
1905 if (vecstype == NULL_TREE)
1906 return NULL;
1907 def2 = vect_recog_temp_ssa_var (stype, NULL);
0d0e4a03 1908 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
68119618
JJ
1909 if (ext_def)
1910 {
1911 basic_block new_bb
1912 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1913 gcc_assert (!new_bb);
1914 }
1915 else
1916 {
310213d4 1917 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
68119618
JJ
1918 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1919 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1920 append_pattern_def_seq (stmt_vinfo, def_stmt);
1921 }
7e9a3abb
JJ
1922
1923 def2 = vect_recog_temp_ssa_var (stype, NULL);
1924 tree mask
1925 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
0d0e4a03
JJ
1926 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1927 gimple_assign_lhs (def_stmt), mask);
68119618
JJ
1928 if (ext_def)
1929 {
1930 basic_block new_bb
1931 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1932 gcc_assert (!new_bb);
1933 }
1934 else
1935 {
310213d4 1936 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
68119618
JJ
1937 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1938 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1939 append_pattern_def_seq (stmt_vinfo, def_stmt);
1940 }
7e9a3abb
JJ
1941 }
1942
1943 var1 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
1944 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1945 ? LSHIFT_EXPR : RSHIFT_EXPR,
1946 oprnd0, def);
7e9a3abb
JJ
1947 append_pattern_def_seq (stmt_vinfo, def_stmt);
1948
1949 var2 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
1950 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1951 ? RSHIFT_EXPR : LSHIFT_EXPR,
1952 oprnd0, def2);
7e9a3abb
JJ
1953 append_pattern_def_seq (stmt_vinfo, def_stmt);
1954
1955 /* Pattern detected. */
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1958 "vect_recog_rotate_pattern: detected:\n");
7e9a3abb
JJ
1959
1960 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1961 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1962 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
7e9a3abb
JJ
1963
1964 if (dump_enabled_p ())
1965 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
36ba4aae 1966
9771b263 1967 stmts->safe_push (last_stmt);
36ba4aae
IR
1968 return pattern_stmt;
1969}
1107f3ae 1970
732a0ad3
JJ
1971/* Detect a vector by vector shift pattern that wouldn't be otherwise
1972 vectorized:
1973
1974 type a_t;
1975 TYPE b_T, res_T;
1976
1977 S1 a_t = ;
1978 S2 b_T = ;
1979 S3 res_T = b_T op a_t;
1980
1981 where type 'TYPE' is a type with different size than 'type',
1982 and op is <<, >> or rotate.
1983
1984 Also detect cases:
1985
1986 type a_t;
1987 TYPE b_T, c_T, res_T;
1988
1989 S0 c_T = ;
1990 S1 a_t = (type) c_T;
1991 S2 b_T = ;
1992 S3 res_T = b_T op a_t;
1993
1994 Input/Output:
1995
1996 * STMTS: Contains a stmt from which the pattern search begins,
1997 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1998 with a shift/rotate which has same type on both operands, in the
1999 second case just b_T op c_T, in the first case with added cast
363477c0 2000 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
732a0ad3
JJ
2001
2002 Output:
2003
2004 * TYPE_IN: The type of the input arguments to the pattern.
2005
2006 * TYPE_OUT: The type of the output of this pattern.
2007
2008 * Return value: A new stmt that will be used to replace the shift/rotate
2009 S3 stmt. */
2010
355fe088
TS
2011static gimple *
2012vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
732a0ad3
JJ
2013 tree *type_in, tree *type_out)
2014{
355fe088 2015 gimple *last_stmt = stmts->pop ();
732a0ad3 2016 tree oprnd0, oprnd1, lhs, var;
355fe088 2017 gimple *pattern_stmt, *def_stmt;
732a0ad3
JJ
2018 enum tree_code rhs_code;
2019 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
310213d4 2020 vec_info *vinfo = stmt_vinfo->vinfo;
732a0ad3 2021 enum vect_def_type dt;
732a0ad3
JJ
2022
2023 if (!is_gimple_assign (last_stmt))
2024 return NULL;
2025
2026 rhs_code = gimple_assign_rhs_code (last_stmt);
2027 switch (rhs_code)
2028 {
2029 case LSHIFT_EXPR:
2030 case RSHIFT_EXPR:
2031 case LROTATE_EXPR:
2032 case RROTATE_EXPR:
2033 break;
2034 default:
2035 return NULL;
2036 }
2037
2038 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2039 return NULL;
2040
2041 lhs = gimple_assign_lhs (last_stmt);
2042 oprnd0 = gimple_assign_rhs1 (last_stmt);
2043 oprnd1 = gimple_assign_rhs2 (last_stmt);
2044 if (TREE_CODE (oprnd0) != SSA_NAME
2045 || TREE_CODE (oprnd1) != SSA_NAME
2046 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2047 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2048 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2049 || TYPE_PRECISION (TREE_TYPE (lhs))
2050 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2051 return NULL;
2052
81c40241 2053 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
732a0ad3
JJ
2054 return NULL;
2055
2056 if (dt != vect_internal_def)
2057 return NULL;
2058
2059 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2060 *type_out = *type_in;
2061 if (*type_in == NULL_TREE)
2062 return NULL;
2063
81c40241 2064 tree def = NULL_TREE;
732a0ad3
JJ
2065 if (gimple_assign_cast_p (def_stmt))
2066 {
2067 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2068 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2069 && TYPE_PRECISION (TREE_TYPE (rhs1))
2070 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2071 def = rhs1;
2072 }
2073
2074 if (def == NULL_TREE)
2075 {
2076 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2077 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
083481d8 2078 new_pattern_def_seq (stmt_vinfo, def_stmt);
732a0ad3
JJ
2079 }
2080
2081 /* Pattern detected. */
73fbfcad 2082 if (dump_enabled_p ())
ccb3ad87 2083 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2084 "vect_recog_vector_vector_shift_pattern: detected:\n");
732a0ad3
JJ
2085
2086 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2087 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2088 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
732a0ad3 2089
73fbfcad 2090 if (dump_enabled_p ())
78c60e3d 2091 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
732a0ad3 2092
9771b263 2093 stmts->safe_push (last_stmt);
732a0ad3
JJ
2094 return pattern_stmt;
2095}
2096
47486460
VK
2097/* Detect multiplication by constant which are postive or negatives of power 2,
2098 and convert them to shift patterns.
2099
2100 Mult with constants that are postive power of two.
2101 type a_t;
2102 type b_t
2103 S1: b_t = a_t * n
2104
2105 or
2106
2107 Mult with constants that are negative power of two.
2108 S2: b_t = a_t * -n
2109
2110 Input/Output:
2111
2112 STMTS: Contains a stmt from which the pattern search begins,
2113 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2114 constant operand is a power of 2.
2115 type a_t, b_t
2116 S1': b_t = a_t << log2 (n)
2117
2118 Convert the mult operation to LSHIFT and followed by a NEGATE
2119 if constant operand is a negative power of 2.
2120 type a_t, b_t, res_T;
2121 S2': b_t = a_t << log2 (n)
2122 S3': res_T = - (b_t)
2123
2124 Output:
2125
2126 * TYPE_IN: The type of the input arguments to the pattern.
2127
2128 * TYPE_OUT: The type of the output of this pattern.
2129
2130 * Return value: A new stmt that will be used to replace the multiplication
2131 S1 or S2 stmt. */
2132
355fe088
TS
2133static gimple *
2134vect_recog_mult_pattern (vec<gimple *> *stmts,
47486460
VK
2135 tree *type_in, tree *type_out)
2136{
355fe088 2137 gimple *last_stmt = stmts->pop ();
47486460 2138 tree oprnd0, oprnd1, vectype, itype;
355fe088 2139 gimple *pattern_stmt, *def_stmt;
47486460
VK
2140 optab optab;
2141 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2142 int power2_val, power2_neg_val;
2143 tree shift;
2144
2145 if (!is_gimple_assign (last_stmt))
2146 return NULL;
2147
2148 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2149 return NULL;
2150
2151 oprnd0 = gimple_assign_rhs1 (last_stmt);
2152 oprnd1 = gimple_assign_rhs2 (last_stmt);
2153 itype = TREE_TYPE (oprnd0);
2154
2155 if (TREE_CODE (oprnd0) != SSA_NAME
2156 || TREE_CODE (oprnd1) != INTEGER_CST
2157 || !INTEGRAL_TYPE_P (itype)
2158 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2159 return NULL;
2160
2161 vectype = get_vectype_for_scalar_type (itype);
2162 if (vectype == NULL_TREE)
2163 return NULL;
2164
2165 /* If the target can handle vectorized multiplication natively,
2166 don't attempt to optimize this. */
2167 optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2168 if (optab != unknown_optab)
2169 {
2170 machine_mode vec_mode = TYPE_MODE (vectype);
2171 int icode = (int) optab_handler (optab, vec_mode);
2172 if (icode != CODE_FOR_nothing)
2173 return NULL;
2174 }
2175
2176 /* If target cannot handle vector left shift then we cannot
2177 optimize and bail out. */
2178 optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2179 if (!optab
2180 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2181 return NULL;
2182
2183 power2_val = wi::exact_log2 (oprnd1);
2184 power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
2185
2186 /* Handle constant operands that are postive or negative powers of 2. */
2187 if (power2_val != -1)
2188 {
2189 shift = build_int_cst (itype, power2_val);
2190 pattern_stmt
2191 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2192 LSHIFT_EXPR, oprnd0, shift);
2193 }
2194 else if (power2_neg_val != -1)
2195 {
2196 /* If the target cannot handle vector NEGATE then we cannot
2197 do the optimization. */
2198 optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
2199 if (!optab
2200 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2201 return NULL;
2202
2203 shift = build_int_cst (itype, power2_neg_val);
2204 def_stmt
2205 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2206 LSHIFT_EXPR, oprnd0, shift);
2207 new_pattern_def_seq (stmt_vinfo, def_stmt);
2208 pattern_stmt
2209 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2210 NEGATE_EXPR, gimple_assign_lhs (def_stmt));
2211 }
2212 else
2213 return NULL;
2214
2215 /* Pattern detected. */
2216 if (dump_enabled_p ())
2217 dump_printf_loc (MSG_NOTE, vect_location,
2218 "vect_recog_mult_pattern: detected:\n");
2219
2220 if (dump_enabled_p ())
2221 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2222 pattern_stmt,0);
2223
2224 stmts->safe_push (last_stmt);
2225 *type_in = vectype;
2226 *type_out = vectype;
2227
2228 return pattern_stmt;
2229}
2230
079c527f 2231/* Detect a signed division by a constant that wouldn't be
363477c0
JJ
2232 otherwise vectorized:
2233
2234 type a_t, b_t;
2235
2236 S1 a_t = b_t / N;
2237
079c527f 2238 where type 'type' is an integral type and N is a constant.
363477c0 2239
079c527f 2240 Similarly handle modulo by a constant:
363477c0
JJ
2241
2242 S4 a_t = b_t % N;
2243
2244 Input/Output:
2245
2246 * STMTS: Contains a stmt from which the pattern search begins,
079c527f
JJ
2247 i.e. the division stmt. S1 is replaced by if N is a power
2248 of two constant and type is signed:
363477c0
JJ
2249 S3 y_t = b_t < 0 ? N - 1 : 0;
2250 S2 x_t = b_t + y_t;
2251 S1' a_t = x_t >> log2 (N);
2252
079c527f
JJ
2253 S4 is replaced if N is a power of two constant and
2254 type is signed by (where *_T temporaries have unsigned type):
363477c0
JJ
2255 S9 y_T = b_t < 0 ? -1U : 0U;
2256 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2257 S7 z_t = (type) z_T;
2258 S6 w_t = b_t + z_t;
2259 S5 x_t = w_t & (N - 1);
2260 S4' a_t = x_t - z_t;
2261
2262 Output:
2263
2264 * TYPE_IN: The type of the input arguments to the pattern.
2265
2266 * TYPE_OUT: The type of the output of this pattern.
2267
2268 * Return value: A new stmt that will be used to replace the division
2269 S1 or modulo S4 stmt. */
2270
355fe088
TS
2271static gimple *
2272vect_recog_divmod_pattern (vec<gimple *> *stmts,
079c527f 2273 tree *type_in, tree *type_out)
363477c0 2274{
355fe088 2275 gimple *last_stmt = stmts->pop ();
5deb57cb 2276 tree oprnd0, oprnd1, vectype, itype, cond;
355fe088 2277 gimple *pattern_stmt, *def_stmt;
363477c0
JJ
2278 enum tree_code rhs_code;
2279 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
310213d4 2280 vec_info *vinfo = stmt_vinfo->vinfo;
363477c0 2281 optab optab;
00f07b86 2282 tree q;
079c527f 2283 int dummy_int, prec;
079c527f 2284 stmt_vec_info def_stmt_vinfo;
363477c0
JJ
2285
2286 if (!is_gimple_assign (last_stmt))
2287 return NULL;
2288
2289 rhs_code = gimple_assign_rhs_code (last_stmt);
2290 switch (rhs_code)
2291 {
2292 case TRUNC_DIV_EXPR:
2293 case TRUNC_MOD_EXPR:
2294 break;
2295 default:
2296 return NULL;
2297 }
2298
2299 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2300 return NULL;
2301
2302 oprnd0 = gimple_assign_rhs1 (last_stmt);
2303 oprnd1 = gimple_assign_rhs2 (last_stmt);
2304 itype = TREE_TYPE (oprnd0);
2305 if (TREE_CODE (oprnd0) != SSA_NAME
2306 || TREE_CODE (oprnd1) != INTEGER_CST
2307 || TREE_CODE (itype) != INTEGER_TYPE
079c527f 2308 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
363477c0
JJ
2309 return NULL;
2310
2311 vectype = get_vectype_for_scalar_type (itype);
2312 if (vectype == NULL_TREE)
2313 return NULL;
2314
2315 /* If the target can handle vectorized division or modulo natively,
2316 don't attempt to optimize this. */
2317 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2225b9f2 2318 if (optab != unknown_optab)
363477c0 2319 {
ef4bddc2 2320 machine_mode vec_mode = TYPE_MODE (vectype);
363477c0 2321 int icode = (int) optab_handler (optab, vec_mode);
e6d4f8f5 2322 if (icode != CODE_FOR_nothing)
363477c0
JJ
2323 return NULL;
2324 }
2325
079c527f
JJ
2326 prec = TYPE_PRECISION (itype);
2327 if (integer_pow2p (oprnd1))
363477c0 2328 {
079c527f
JJ
2329 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2330 return NULL;
363477c0 2331
079c527f 2332 /* Pattern detected. */
73fbfcad 2333 if (dump_enabled_p ())
ccb3ad87 2334 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2335 "vect_recog_divmod_pattern: detected:\n");
079c527f
JJ
2336
2337 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2338 build_int_cst (itype, 0));
2339 if (rhs_code == TRUNC_DIV_EXPR)
2340 {
2341 tree var = vect_recog_temp_ssa_var (itype, NULL);
2342 tree shift;
2343 def_stmt
0d0e4a03
JJ
2344 = gimple_build_assign (var, COND_EXPR, cond,
2345 fold_build2 (MINUS_EXPR, itype, oprnd1,
2346 build_int_cst (itype, 1)),
2347 build_int_cst (itype, 0));
079c527f
JJ
2348 new_pattern_def_seq (stmt_vinfo, def_stmt);
2349 var = vect_recog_temp_ssa_var (itype, NULL);
2350 def_stmt
0d0e4a03
JJ
2351 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2352 gimple_assign_lhs (def_stmt));
079c527f
JJ
2353 append_pattern_def_seq (stmt_vinfo, def_stmt);
2354
2355 shift = build_int_cst (itype, tree_log2 (oprnd1));
2356 pattern_stmt
0d0e4a03
JJ
2357 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2358 RSHIFT_EXPR, var, shift);
079c527f
JJ
2359 }
2360 else
2361 {
2362 tree signmask;
2363 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2364 if (compare_tree_int (oprnd1, 2) == 0)
2365 {
2366 signmask = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2367 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2368 build_int_cst (itype, 1),
2369 build_int_cst (itype, 0));
079c527f
JJ
2370 append_pattern_def_seq (stmt_vinfo, def_stmt);
2371 }
2372 else
2373 {
2374 tree utype
2375 = build_nonstandard_integer_type (prec, 1);
2376 tree vecutype = get_vectype_for_scalar_type (utype);
2377 tree shift
2378 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2379 - tree_log2 (oprnd1));
2380 tree var = vect_recog_temp_ssa_var (utype, NULL);
2381
0d0e4a03
JJ
2382 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2383 build_int_cst (utype, -1),
2384 build_int_cst (utype, 0));
310213d4 2385 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
079c527f
JJ
2386 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2387 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2388 append_pattern_def_seq (stmt_vinfo, def_stmt);
2389 var = vect_recog_temp_ssa_var (utype, NULL);
0d0e4a03
JJ
2390 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2391 gimple_assign_lhs (def_stmt),
2392 shift);
310213d4 2393 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
079c527f
JJ
2394 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2395 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2396 append_pattern_def_seq (stmt_vinfo, def_stmt);
2397 signmask = vect_recog_temp_ssa_var (itype, NULL);
2398 def_stmt
0d0e4a03 2399 = gimple_build_assign (signmask, NOP_EXPR, var);
079c527f
JJ
2400 append_pattern_def_seq (stmt_vinfo, def_stmt);
2401 }
2402 def_stmt
0d0e4a03
JJ
2403 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2404 PLUS_EXPR, oprnd0, signmask);
079c527f
JJ
2405 append_pattern_def_seq (stmt_vinfo, def_stmt);
2406 def_stmt
0d0e4a03
JJ
2407 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2408 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2409 fold_build2 (MINUS_EXPR, itype, oprnd1,
2410 build_int_cst (itype, 1)));
079c527f
JJ
2411 append_pattern_def_seq (stmt_vinfo, def_stmt);
2412
2413 pattern_stmt
0d0e4a03
JJ
2414 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2415 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2416 signmask);
079c527f
JJ
2417 }
2418
73fbfcad 2419 if (dump_enabled_p ())
78c60e3d
SS
2420 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2421 0);
079c527f 2422
9771b263 2423 stmts->safe_push (last_stmt);
079c527f
JJ
2424
2425 *type_in = vectype;
2426 *type_out = vectype;
2427 return pattern_stmt;
363477c0 2428 }
079c527f 2429
6b58915b
RS
2430 if (prec > HOST_BITS_PER_WIDE_INT
2431 || integer_zerop (oprnd1))
079c527f
JJ
2432 return NULL;
2433
00f07b86
RH
2434 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2435 return NULL;
079c527f
JJ
2436
2437 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2438
2439 if (TYPE_UNSIGNED (itype))
363477c0 2440 {
079c527f
JJ
2441 unsigned HOST_WIDE_INT mh, ml;
2442 int pre_shift, post_shift;
6b58915b
RS
2443 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2444 & GET_MODE_MASK (TYPE_MODE (itype)));
5deb57cb 2445 tree t1, t2, t3, t4;
079c527f
JJ
2446
2447 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2448 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2449 return NULL;
2450
2451 /* Find a suitable multiplier and right shift count
2452 instead of multiplying with D. */
2453 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2454
2455 /* If the suggested multiplier is more than SIZE bits, we can do better
2456 for even divisors, using an initial right shift. */
2457 if (mh != 0 && (d & 1) == 0)
363477c0 2458 {
079c527f
JJ
2459 pre_shift = floor_log2 (d & -d);
2460 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2461 &ml, &post_shift, &dummy_int);
2462 gcc_assert (!mh);
2463 }
2464 else
2465 pre_shift = 0;
2466
2467 if (mh != 0)
2468 {
2469 if (post_shift - 1 >= prec)
2470 return NULL;
2471
5deb57cb
JJ
2472 /* t1 = oprnd0 h* ml;
2473 t2 = oprnd0 - t1;
2474 t3 = t2 >> 1;
2475 t4 = t1 + t3;
2476 q = t4 >> (post_shift - 1); */
2477 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2478 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2479 build_int_cst (itype, ml));
079c527f 2480 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2481
5deb57cb 2482 t2 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2483 def_stmt
0d0e4a03 2484 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
083481d8 2485 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
2486
2487 t3 = vect_recog_temp_ssa_var (itype, NULL);
2488 def_stmt
0d0e4a03 2489 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
079c527f
JJ
2490 append_pattern_def_seq (stmt_vinfo, def_stmt);
2491
5deb57cb 2492 t4 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2493 def_stmt
0d0e4a03 2494 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
079c527f
JJ
2495
2496 if (post_shift != 1)
2497 {
2498 append_pattern_def_seq (stmt_vinfo, def_stmt);
2499
5deb57cb 2500 q = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2501 pattern_stmt
0d0e4a03
JJ
2502 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2503 build_int_cst (itype, post_shift - 1));
079c527f
JJ
2504 }
2505 else
2506 {
5deb57cb 2507 q = t4;
079c527f
JJ
2508 pattern_stmt = def_stmt;
2509 }
363477c0
JJ
2510 }
2511 else
2512 {
079c527f
JJ
2513 if (pre_shift >= prec || post_shift >= prec)
2514 return NULL;
2515
2516 /* t1 = oprnd0 >> pre_shift;
5deb57cb
JJ
2517 t2 = t1 h* ml;
2518 q = t2 >> post_shift; */
079c527f
JJ
2519 if (pre_shift)
2520 {
2521 t1 = vect_recog_temp_ssa_var (itype, NULL);
2522 def_stmt
0d0e4a03
JJ
2523 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2524 build_int_cst (NULL, pre_shift));
079c527f
JJ
2525 append_pattern_def_seq (stmt_vinfo, def_stmt);
2526 }
2527 else
2528 t1 = oprnd0;
363477c0 2529
5deb57cb 2530 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2531 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2532 build_int_cst (itype, ml));
079c527f 2533
5deb57cb
JJ
2534 if (post_shift)
2535 {
2536 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2537
5deb57cb
JJ
2538 q = vect_recog_temp_ssa_var (itype, NULL);
2539 def_stmt
0d0e4a03
JJ
2540 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2541 build_int_cst (itype, post_shift));
5deb57cb
JJ
2542 }
2543 else
2544 q = t2;
2545
2546 pattern_stmt = def_stmt;
079c527f
JJ
2547 }
2548 }
2549 else
2550 {
2551 unsigned HOST_WIDE_INT ml;
4ee4c52c 2552 int post_shift;
6b58915b 2553 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
079c527f
JJ
2554 unsigned HOST_WIDE_INT abs_d;
2555 bool add = false;
5deb57cb 2556 tree t1, t2, t3, t4;
079c527f
JJ
2557
2558 /* Give up for -1. */
2559 if (d == -1)
2560 return NULL;
2561
079c527f
JJ
2562 /* Since d might be INT_MIN, we have to cast to
2563 unsigned HOST_WIDE_INT before negating to avoid
2564 undefined signed overflow. */
2565 abs_d = (d >= 0
2566 ? (unsigned HOST_WIDE_INT) d
2567 : - (unsigned HOST_WIDE_INT) d);
2568
2569 /* n rem d = n rem -d */
2570 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2571 {
2572 d = abs_d;
2573 oprnd1 = build_int_cst (itype, abs_d);
2574 }
2575 else if (HOST_BITS_PER_WIDE_INT >= prec
2576 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2577 /* This case is not handled correctly below. */
2578 return NULL;
2579
4ee4c52c 2580 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
079c527f
JJ
2581 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2582 {
2583 add = true;
2584 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2585 }
2586 if (post_shift >= prec)
2587 return NULL;
2588
7abed779 2589 /* t1 = oprnd0 h* ml; */
5deb57cb 2590 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2591 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2592 build_int_cst (itype, ml));
079c527f
JJ
2593
2594 if (add)
2595 {
5deb57cb 2596 /* t2 = t1 + oprnd0; */
7abed779 2597 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2598 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2599 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
079c527f
JJ
2600 }
2601 else
5deb57cb 2602 t2 = t1;
079c527f 2603
5deb57cb 2604 if (post_shift)
079c527f 2605 {
5deb57cb 2606 /* t3 = t2 >> post_shift; */
7abed779 2607 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2608 t3 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2609 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2610 build_int_cst (itype, post_shift));
363477c0 2611 }
079c527f 2612 else
5deb57cb 2613 t3 = t2;
079c527f 2614
807e902e 2615 wide_int oprnd0_min, oprnd0_max;
7abed779
JJ
2616 int msb = 1;
2617 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2618 {
807e902e 2619 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
7abed779 2620 msb = 0;
807e902e 2621 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
7abed779
JJ
2622 msb = -1;
2623 }
079c527f 2624
7abed779
JJ
2625 if (msb == 0 && d >= 0)
2626 {
2627 /* q = t3; */
2628 q = t3;
2629 pattern_stmt = def_stmt;
2630 }
2631 else
2632 {
2633 /* t4 = oprnd0 >> (prec - 1);
2634 or if we know from VRP that oprnd0 >= 0
2635 t4 = 0;
2636 or if we know from VRP that oprnd0 < 0
2637 t4 = -1; */
2638 append_pattern_def_seq (stmt_vinfo, def_stmt);
2639 t4 = vect_recog_temp_ssa_var (itype, NULL);
2640 if (msb != 1)
0d0e4a03
JJ
2641 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2642 build_int_cst (itype, msb));
7abed779 2643 else
0d0e4a03
JJ
2644 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2645 build_int_cst (itype, prec - 1));
7abed779
JJ
2646 append_pattern_def_seq (stmt_vinfo, def_stmt);
2647
2648 /* q = t3 - t4; or q = t4 - t3; */
2649 q = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2650 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2651 d < 0 ? t3 : t4);
7abed779 2652 }
079c527f
JJ
2653 }
2654
2655 if (rhs_code == TRUNC_MOD_EXPR)
2656 {
2657 tree r, t1;
2658
2659 /* We divided. Now finish by:
2660 t1 = q * oprnd1;
2661 r = oprnd0 - t1; */
2662 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2663
2664 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2665 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
083481d8 2666 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0 2667
079c527f 2668 r = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2669 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
363477c0
JJ
2670 }
2671
079c527f 2672 /* Pattern detected. */
73fbfcad 2673 if (dump_enabled_p ())
78c60e3d 2674 {
ccb3ad87 2675 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 2676 "vect_recog_divmod_pattern: detected: ");
ccb3ad87 2677 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 2678 dump_printf (MSG_NOTE, "\n");
78c60e3d 2679 }
363477c0 2680
9771b263 2681 stmts->safe_push (last_stmt);
363477c0
JJ
2682
2683 *type_in = vectype;
2684 *type_out = vectype;
2685 return pattern_stmt;
2686}
2687
69d2aade
JJ
2688/* Function vect_recog_mixed_size_cond_pattern
2689
2690 Try to find the following pattern:
2691
2692 type x_t, y_t;
2693 TYPE a_T, b_T, c_T;
2694 loop:
2695 S1 a_T = x_t CMP y_t ? b_T : c_T;
2696
2697 where type 'TYPE' is an integral type which has different size
bc4fb355 2698 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
69d2aade 2699 than 'type', the constants need to fit into an integer type
bc4fb355 2700 with the same width as 'type') or results of conversion from 'type'.
69d2aade
JJ
2701
2702 Input:
2703
2704 * LAST_STMT: A stmt from which the pattern search begins.
2705
2706 Output:
2707
2708 * TYPE_IN: The type of the input arguments to the pattern.
2709
2710 * TYPE_OUT: The type of the output of this pattern.
2711
2712 * Return value: A new stmt that will be used to replace the pattern.
2713 Additionally a def_stmt is added.
2714
2715 a_it = x_t CMP y_t ? b_it : c_it;
2716 a_T = (TYPE) a_it; */
2717
355fe088
TS
2718static gimple *
2719vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
69d2aade
JJ
2720 tree *type_out)
2721{
355fe088 2722 gimple *last_stmt = (*stmts)[0];
69d2aade
JJ
2723 tree cond_expr, then_clause, else_clause;
2724 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
bc4fb355 2725 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
355fe088 2726 gimple *pattern_stmt, *def_stmt;
310213d4 2727 vec_info *vinfo = stmt_vinfo->vinfo;
bc4fb355 2728 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
355fe088 2729 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
bc4fb355
IR
2730 bool promotion;
2731 tree comp_scalar_type;
69d2aade
JJ
2732
2733 if (!is_gimple_assign (last_stmt)
2734 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2735 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2736 return NULL;
2737
2738 cond_expr = gimple_assign_rhs1 (last_stmt);
2739 then_clause = gimple_assign_rhs2 (last_stmt);
2740 else_clause = gimple_assign_rhs3 (last_stmt);
2741
87aab9b2
JJ
2742 if (!COMPARISON_CLASS_P (cond_expr))
2743 return NULL;
2744
bc4fb355
IR
2745 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2746 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
87aab9b2 2747 if (comp_vectype == NULL_TREE)
69d2aade
JJ
2748 return NULL;
2749
2750 type = gimple_expr_type (last_stmt);
bc4fb355
IR
2751 if (types_compatible_p (type, comp_scalar_type)
2752 || ((TREE_CODE (then_clause) != INTEGER_CST
2753 || TREE_CODE (else_clause) != INTEGER_CST)
2754 && !INTEGRAL_TYPE_P (comp_scalar_type))
2755 || !INTEGRAL_TYPE_P (type))
2756 return NULL;
2757
2758 if ((TREE_CODE (then_clause) != INTEGER_CST
2759 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2760 &def_stmt0, &promotion))
2761 || (TREE_CODE (else_clause) != INTEGER_CST
2762 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2763 &def_stmt1, &promotion)))
2764 return NULL;
2765
2766 if (orig_type0 && orig_type1
2767 && !types_compatible_p (orig_type0, orig_type1))
2768 return NULL;
2769
2770 if (orig_type0)
2771 {
2772 if (!types_compatible_p (orig_type0, comp_scalar_type))
2773 return NULL;
2774 then_clause = gimple_assign_rhs1 (def_stmt0);
2775 itype = orig_type0;
2776 }
2777
2778 if (orig_type1)
2779 {
2780 if (!types_compatible_p (orig_type1, comp_scalar_type))
2781 return NULL;
2782 else_clause = gimple_assign_rhs1 (def_stmt1);
2783 itype = orig_type1;
2784 }
2785
69d2aade 2786
6c825cd4
DS
2787 HOST_WIDE_INT cmp_mode_size
2788 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
2789
2790 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
69d2aade
JJ
2791 return NULL;
2792
2793 vectype = get_vectype_for_scalar_type (type);
2794 if (vectype == NULL_TREE)
2795 return NULL;
2796
2797 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2798 return NULL;
2799
bc4fb355 2800 if (itype == NULL_TREE)
6c825cd4 2801 itype = build_nonstandard_integer_type (cmp_mode_size,
bc4fb355
IR
2802 TYPE_UNSIGNED (type));
2803
69d2aade 2804 if (itype == NULL_TREE
6c825cd4 2805 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
69d2aade
JJ
2806 return NULL;
2807
2808 vecitype = get_vectype_for_scalar_type (itype);
2809 if (vecitype == NULL_TREE)
2810 return NULL;
2811
2812 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2813 return NULL;
2814
6c825cd4 2815 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
69d2aade 2816 {
bc4fb355
IR
2817 if ((TREE_CODE (then_clause) == INTEGER_CST
2818 && !int_fits_type_p (then_clause, itype))
2819 || (TREE_CODE (else_clause) == INTEGER_CST
2820 && !int_fits_type_p (else_clause, itype)))
69d2aade
JJ
2821 return NULL;
2822 }
2823
0d0e4a03
JJ
2824 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2825 COND_EXPR, unshare_expr (cond_expr),
2826 fold_convert (itype, then_clause),
2827 fold_convert (itype, else_clause));
2828 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2829 NOP_EXPR, gimple_assign_lhs (def_stmt));
69d2aade 2830
083481d8 2831 new_pattern_def_seq (stmt_vinfo, def_stmt);
310213d4 2832 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
69d2aade
JJ
2833 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2834 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2835 *type_in = vecitype;
2836 *type_out = vectype;
2837
73fbfcad 2838 if (dump_enabled_p ())
ccb3ad87 2839 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2840 "vect_recog_mixed_size_cond_pattern: detected:\n");
f5709183 2841
69d2aade
JJ
2842 return pattern_stmt;
2843}
2844
2845
71c92d17 2846/* Helper function of vect_recog_bool_pattern. Called recursively, return
42fd8198
IE
2847 true if bool VAR can and should be optimized that way. Assume it shouldn't
2848 in case it's a result of a comparison which can be directly vectorized into
2849 a vector comparison. */
71c92d17
JJ
2850
2851static bool
310213d4 2852check_bool_pattern (tree var, vec_info *vinfo)
71c92d17 2853{
355fe088 2854 gimple *def_stmt;
71c92d17 2855 enum vect_def_type dt;
81c40241 2856 tree rhs1;
71c92d17
JJ
2857 enum tree_code rhs_code;
2858
81c40241 2859 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
71c92d17
JJ
2860 return false;
2861
2862 if (dt != vect_internal_def)
2863 return false;
2864
2865 if (!is_gimple_assign (def_stmt))
2866 return false;
2867
81c40241 2868 if (!has_single_use (var))
71c92d17
JJ
2869 return false;
2870
2871 rhs1 = gimple_assign_rhs1 (def_stmt);
2872 rhs_code = gimple_assign_rhs_code (def_stmt);
2873 switch (rhs_code)
2874 {
2875 case SSA_NAME:
310213d4 2876 return check_bool_pattern (rhs1, vinfo);
71c92d17
JJ
2877
2878 CASE_CONVERT:
2879 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2880 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2881 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2882 return false;
310213d4 2883 return check_bool_pattern (rhs1, vinfo);
71c92d17
JJ
2884
2885 case BIT_NOT_EXPR:
310213d4 2886 return check_bool_pattern (rhs1, vinfo);
71c92d17
JJ
2887
2888 case BIT_AND_EXPR:
2889 case BIT_IOR_EXPR:
2890 case BIT_XOR_EXPR:
310213d4 2891 if (!check_bool_pattern (rhs1, vinfo))
71c92d17 2892 return false;
310213d4 2893 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo);
71c92d17
JJ
2894
2895 default:
2896 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2897 {
42fd8198 2898 tree vecitype, comp_vectype, mask_type;
71c92d17 2899
2f326699
JJ
2900 /* If the comparison can throw, then is_gimple_condexpr will be
2901 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2902 if (stmt_could_throw_p (def_stmt))
2903 return false;
2904
71c92d17
JJ
2905 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2906 if (comp_vectype == NULL_TREE)
2907 return false;
2908
42fd8198
IE
2909 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
2910 if (mask_type
2911 && expand_vec_cmp_expr_p (comp_vectype, mask_type))
2912 return false;
2913
71c92d17
JJ
2914 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2915 {
ef4bddc2 2916 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 2917 tree itype
ab0ef706 2918 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
2919 vecitype = get_vectype_for_scalar_type (itype);
2920 if (vecitype == NULL_TREE)
2921 return false;
2922 }
2923 else
2924 vecitype = comp_vectype;
2925 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2926 }
2927 return false;
2928 }
2929}
2930
2931
2932/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2933 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
363477c0 2934 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
71c92d17
JJ
2935
2936static tree
2937adjust_bool_pattern_cast (tree type, tree var)
2938{
2939 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
355fe088 2940 gimple *cast_stmt, *pattern_stmt;
71c92d17 2941
363477c0 2942 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
71c92d17 2943 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
083481d8 2944 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
0d0e4a03
JJ
2945 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2946 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
71c92d17
JJ
2947 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2948 return gimple_assign_lhs (cast_stmt);
2949}
2950
2951
2952/* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2953 recursively. VAR is an SSA_NAME that should be transformed from bool
2954 to a wider integer type, OUT_TYPE is the desired final integer type of
2955 the whole pattern, TRUEVAL should be NULL unless optimizing
2956 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2957 in the then_clause, STMTS is where statements with added pattern stmts
2958 should be pushed to. */
2959
2960static tree
2961adjust_bool_pattern (tree var, tree out_type, tree trueval,
355fe088 2962 vec<gimple *> *stmts)
71c92d17 2963{
355fe088 2964 gimple *stmt = SSA_NAME_DEF_STMT (var);
71c92d17
JJ
2965 enum tree_code rhs_code, def_rhs_code;
2966 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2967 location_t loc;
355fe088 2968 gimple *pattern_stmt, *def_stmt;
71c92d17
JJ
2969
2970 rhs1 = gimple_assign_rhs1 (stmt);
2971 rhs2 = gimple_assign_rhs2 (stmt);
2972 rhs_code = gimple_assign_rhs_code (stmt);
2973 loc = gimple_location (stmt);
2974 switch (rhs_code)
2975 {
2976 case SSA_NAME:
2977 CASE_CONVERT:
2978 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2979 itype = TREE_TYPE (irhs1);
2980 pattern_stmt
0d0e4a03
JJ
2981 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2982 SSA_NAME, irhs1);
71c92d17
JJ
2983 break;
2984
2985 case BIT_NOT_EXPR:
2986 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2987 itype = TREE_TYPE (irhs1);
2988 pattern_stmt
0d0e4a03
JJ
2989 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2990 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
71c92d17
JJ
2991 break;
2992
2993 case BIT_AND_EXPR:
2994 /* Try to optimize x = y & (a < b ? 1 : 0); into
2995 x = (a < b ? y : 0);
2996
2997 E.g. for:
2998 bool a_b, b_b, c_b;
2999 TYPE d_T;
3000
3001 S1 a_b = x1 CMP1 y1;
3002 S2 b_b = x2 CMP2 y2;
3003 S3 c_b = a_b & b_b;
3004 S4 d_T = (TYPE) c_b;
3005
3006 we would normally emit:
3007
3008 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3009 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3010 S3' c_T = a_T & b_T;
3011 S4' d_T = c_T;
3012
3013 but we can save one stmt by using the
3014 result of one of the COND_EXPRs in the other COND_EXPR and leave
3015 BIT_AND_EXPR stmt out:
3016
3017 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3018 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3019 S4' f_T = c_T;
3020
3021 At least when VEC_COND_EXPR is implemented using masks
3022 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3023 computes the comparison masks and ands it, in one case with
3024 all ones vector, in the other case with a vector register.
3025 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3026 often more expensive. */
3027 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3028 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3029 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3030 {
3031 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3032 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3033 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3034 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3035 {
355fe088 3036 gimple *tstmt;
71c92d17
JJ
3037 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3038 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
9771b263 3039 tstmt = stmts->pop ();
71c92d17 3040 gcc_assert (tstmt == def_stmt);
9771b263 3041 stmts->quick_push (stmt);
71c92d17
JJ
3042 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3043 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 3044 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
3045 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3046 return irhs2;
3047 }
3048 else
3049 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3050 goto and_ior_xor;
3051 }
3052 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3053 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3054 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3055 {
3056 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3057 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3058 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3059 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3060 {
355fe088 3061 gimple *tstmt;
71c92d17
JJ
3062 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3063 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
9771b263 3064 tstmt = stmts->pop ();
71c92d17 3065 gcc_assert (tstmt == def_stmt);
9771b263 3066 stmts->quick_push (stmt);
71c92d17
JJ
3067 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3068 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 3069 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
3070 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3071 return irhs1;
3072 }
3073 else
3074 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3075 goto and_ior_xor;
3076 }
3077 /* FALLTHRU */
3078 case BIT_IOR_EXPR:
3079 case BIT_XOR_EXPR:
3080 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3081 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3082 and_ior_xor:
3083 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3084 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3085 {
3086 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3087 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3088 int out_prec = TYPE_PRECISION (out_type);
3089 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3090 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3091 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3092 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3093 else
3094 {
3095 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3096 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3097 }
3098 }
3099 itype = TREE_TYPE (irhs1);
3100 pattern_stmt
0d0e4a03
JJ
3101 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3102 rhs_code, irhs1, irhs2);
71c92d17
JJ
3103 break;
3104
3105 default:
3106 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3107 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
e6a21dd2
JJ
3108 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3109 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3110 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
71c92d17 3111 {
ef4bddc2 3112 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3113 itype
ab0ef706 3114 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3115 }
3116 else
3117 itype = TREE_TYPE (rhs1);
3118 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3119 if (trueval == NULL_TREE)
3120 trueval = build_int_cst (itype, 1);
3121 else
3122 gcc_checking_assert (useless_type_conversion_p (itype,
3123 TREE_TYPE (trueval)));
3124 pattern_stmt
0d0e4a03
JJ
3125 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3126 COND_EXPR, cond_expr, trueval,
3127 build_int_cst (itype, 0));
71c92d17
JJ
3128 break;
3129 }
3130
9771b263 3131 stmts->safe_push (stmt);
71c92d17
JJ
3132 gimple_set_location (pattern_stmt, loc);
3133 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3134 return gimple_assign_lhs (pattern_stmt);
3135}
3136
3137
42fd8198
IE
3138/* Return the proper type for converting bool VAR into
3139 an integer value or NULL_TREE if no such type exists.
3140 The type is chosen so that converted value has the
3141 same number of elements as VAR's vector type. */
3142
3143static tree
3144search_type_for_mask (tree var, vec_info *vinfo)
3145{
3146 gimple *def_stmt;
3147 enum vect_def_type dt;
3148 tree rhs1;
3149 enum tree_code rhs_code;
e6f5c25d 3150 tree res = NULL_TREE, res2;
42fd8198
IE
3151
3152 if (TREE_CODE (var) != SSA_NAME)
3153 return NULL_TREE;
3154
3155 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3156 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3157 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3158 return NULL_TREE;
3159
3160 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3161 return NULL_TREE;
3162
3163 if (dt != vect_internal_def)
3164 return NULL_TREE;
3165
3166 if (!is_gimple_assign (def_stmt))
3167 return NULL_TREE;
3168
3169 rhs_code = gimple_assign_rhs_code (def_stmt);
3170 rhs1 = gimple_assign_rhs1 (def_stmt);
3171
3172 switch (rhs_code)
3173 {
3174 case SSA_NAME:
3175 case BIT_NOT_EXPR:
3176 CASE_CONVERT:
3177 res = search_type_for_mask (rhs1, vinfo);
3178 break;
3179
3180 case BIT_AND_EXPR:
3181 case BIT_IOR_EXPR:
3182 case BIT_XOR_EXPR:
e6f5c25d
IE
3183 res = search_type_for_mask (rhs1, vinfo);
3184 res2 = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo);
3185 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3186 res = res2;
42fd8198
IE
3187 break;
3188
3189 default:
3190 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3191 {
e6f5c25d
IE
3192 tree comp_vectype, mask_type;
3193
3194 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3195 if (comp_vectype == NULL_TREE)
3196 return NULL_TREE;
3197
3198 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3199 if (!mask_type
3200 || !expand_vec_cmp_expr_p (comp_vectype, mask_type))
3201 return NULL_TREE;
3202
42fd8198
IE
3203 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3204 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3205 {
3206 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3207 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3208 }
3209 else
3210 res = TREE_TYPE (rhs1);
3211 }
3212 }
3213
3214 return res;
3215}
3216
3217
71c92d17
JJ
3218/* Function vect_recog_bool_pattern
3219
3220 Try to find pattern like following:
3221
3222 bool a_b, b_b, c_b, d_b, e_b;
3223 TYPE f_T;
3224 loop:
3225 S1 a_b = x1 CMP1 y1;
3226 S2 b_b = x2 CMP2 y2;
3227 S3 c_b = a_b & b_b;
3228 S4 d_b = x3 CMP3 y3;
3229 S5 e_b = c_b | d_b;
3230 S6 f_T = (TYPE) e_b;
3231
52264dbf
RB
3232 where type 'TYPE' is an integral type. Or a similar pattern
3233 ending in
3234
3235 S6 f_Y = e_b ? r_Y : s_Y;
3236
3237 as results from if-conversion of a complex condition.
71c92d17
JJ
3238
3239 Input:
3240
3241 * LAST_STMT: A stmt at the end from which the pattern
3242 search begins, i.e. cast of a bool to
3243 an integer type.
3244
3245 Output:
3246
3247 * TYPE_IN: The type of the input arguments to the pattern.
3248
3249 * TYPE_OUT: The type of the output of this pattern.
3250
3251 * Return value: A new stmt that will be used to replace the pattern.
3252
3253 Assuming size of TYPE is the same as size of all comparisons
3254 (otherwise some casts would be added where needed), the above
3255 sequence we create related pattern stmts:
3256 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3257 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3258 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3259 S5' e_T = c_T | d_T;
3260 S6' f_T = e_T;
3261
3262 Instead of the above S3' we could emit:
3263 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3264 S3' c_T = a_T | b_T;
3265 but the above is more efficient. */
3266
355fe088
TS
3267static gimple *
3268vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
71c92d17
JJ
3269 tree *type_out)
3270{
355fe088 3271 gimple *last_stmt = stmts->pop ();
71c92d17
JJ
3272 enum tree_code rhs_code;
3273 tree var, lhs, rhs, vectype;
3274 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
42fd8198 3275 stmt_vec_info new_stmt_info;
310213d4 3276 vec_info *vinfo = stmt_vinfo->vinfo;
355fe088 3277 gimple *pattern_stmt;
71c92d17
JJ
3278
3279 if (!is_gimple_assign (last_stmt))
3280 return NULL;
3281
3282 var = gimple_assign_rhs1 (last_stmt);
3283 lhs = gimple_assign_lhs (last_stmt);
3284
3285 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3286 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3287 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3288 return NULL;
3289
3290 rhs_code = gimple_assign_rhs_code (last_stmt);
3291 if (CONVERT_EXPR_CODE_P (rhs_code))
3292 {
78048b1c
JJ
3293 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3294 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
71c92d17
JJ
3295 return NULL;
3296 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3297 if (vectype == NULL_TREE)
3298 return NULL;
3299
42fd8198
IE
3300 if (check_bool_pattern (var, vinfo))
3301 {
3302 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3303 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3304 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3305 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3306 else
3307 pattern_stmt
3308 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3309 }
71c92d17 3310 else
42fd8198
IE
3311 {
3312 tree type = search_type_for_mask (var, vinfo);
a414c77f 3313 tree cst0, cst1, tmp;
42fd8198
IE
3314
3315 if (!type)
3316 return NULL;
3317
3318 /* We may directly use cond with narrowed type to avoid
3319 multiple cond exprs with following result packing and
3320 perform single cond with packed mask instead. In case
3321 of widening we better make cond first and then extract
3322 results. */
3323 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3324 type = TREE_TYPE (lhs);
3325
3326 cst0 = build_int_cst (type, 0);
3327 cst1 = build_int_cst (type, 1);
3328 tmp = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3329 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
42fd8198
IE
3330
3331 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3332 {
3333 tree new_vectype = get_vectype_for_scalar_type (type);
3334 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3335 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3336 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3337 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3338
3339 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3340 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3341 }
3342 }
3343
71c92d17
JJ
3344 *type_out = vectype;
3345 *type_in = vectype;
9771b263 3346 stmts->safe_push (last_stmt);
73fbfcad 3347 if (dump_enabled_p ())
ccb3ad87 3348 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3349 "vect_recog_bool_pattern: detected:\n");
f5709183 3350
52264dbf
RB
3351 return pattern_stmt;
3352 }
3353 else if (rhs_code == COND_EXPR
3354 && TREE_CODE (var) == SSA_NAME)
3355 {
3356 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3357 if (vectype == NULL_TREE)
3358 return NULL;
3359
3360 /* Build a scalar type for the boolean result that when
3361 vectorized matches the vector type of the result in
3362 size and number of elements. */
3363 unsigned prec
3364 = wi::udiv_trunc (TYPE_SIZE (vectype),
3365 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3366 tree type
3367 = build_nonstandard_integer_type (prec,
3368 TYPE_UNSIGNED (TREE_TYPE (var)));
3369 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3370 return NULL;
3371
a414c77f
IE
3372 if (!check_bool_pattern (var, vinfo))
3373 return NULL;
3374
3375 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
52264dbf 3376
52264dbf
RB
3377 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3378 pattern_stmt
a414c77f
IE
3379 = gimple_build_assign (lhs, COND_EXPR,
3380 build2 (NE_EXPR, boolean_type_node,
3381 rhs, build_int_cst (type, 0)),
0d0e4a03
JJ
3382 gimple_assign_rhs2 (last_stmt),
3383 gimple_assign_rhs3 (last_stmt));
52264dbf
RB
3384 *type_out = vectype;
3385 *type_in = vectype;
3386 stmts->safe_push (last_stmt);
3387 if (dump_enabled_p ())
3388 dump_printf_loc (MSG_NOTE, vect_location,
3389 "vect_recog_bool_pattern: detected:\n");
3390
71c92d17
JJ
3391 return pattern_stmt;
3392 }
ab0ef706
JJ
3393 else if (rhs_code == SSA_NAME
3394 && STMT_VINFO_DATA_REF (stmt_vinfo))
3395 {
3396 stmt_vec_info pattern_stmt_info;
3397 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3398 gcc_assert (vectype != NULL_TREE);
78336739
JJ
3399 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3400 return NULL;
ab0ef706 3401
42fd8198
IE
3402 if (check_bool_pattern (var, vinfo))
3403 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype),
3404 NULL_TREE, stmts);
3405 else
3406 {
3407 tree type = search_type_for_mask (var, vinfo);
a414c77f 3408 tree cst0, cst1, new_vectype;
42fd8198
IE
3409
3410 if (!type)
3411 return NULL;
3412
3413 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3414 type = TREE_TYPE (vectype);
3415
3416 cst0 = build_int_cst (type, 0);
3417 cst1 = build_int_cst (type, 1);
3418 new_vectype = get_vectype_for_scalar_type (type);
3419
3420 rhs = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3421 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
42fd8198
IE
3422
3423 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3424 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3425 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3426 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3427 }
3428
ab0ef706
JJ
3429 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3430 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3431 {
3432 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
355fe088 3433 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
42fd8198 3434 append_pattern_def_seq (stmt_vinfo, cast_stmt);
ab0ef706
JJ
3435 rhs = rhs2;
3436 }
0d0e4a03 3437 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
310213d4 3438 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
ab0ef706
JJ
3439 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3440 STMT_VINFO_DATA_REF (pattern_stmt_info)
3441 = STMT_VINFO_DATA_REF (stmt_vinfo);
3442 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3443 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3444 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3445 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3446 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3447 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3448 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3449 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
78048b1c 3450 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
ab0ef706
JJ
3451 *type_out = vectype;
3452 *type_in = vectype;
9771b263 3453 stmts->safe_push (last_stmt);
73fbfcad 3454 if (dump_enabled_p ())
ccb3ad87 3455 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3456 "vect_recog_bool_pattern: detected:\n");
ab0ef706
JJ
3457 return pattern_stmt;
3458 }
71c92d17
JJ
3459 else
3460 return NULL;
3461}
3462
3463
e6f5c25d
IE
3464/* A helper for vect_recog_mask_conversion_pattern. Build
3465 conversion of MASK to a type suitable for masking VECTYPE.
3466 Built statement gets required vectype and is appended to
3467 a pattern sequence of STMT_VINFO.
3468
3469 Return converted mask. */
3470
3471static tree
3472build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3473 vec_info *vinfo)
3474{
3475 gimple *stmt;
3476 tree masktype, tmp;
3477 stmt_vec_info new_stmt_info;
3478
3479 masktype = build_same_sized_truth_vector_type (vectype);
3480 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3481 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3482 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3483 set_vinfo_for_stmt (stmt, new_stmt_info);
3484 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3485 append_pattern_def_seq (stmt_vinfo, stmt);
3486
3487 return tmp;
3488}
3489
3490
3491/* Function vect_recog_mask_conversion_pattern
3492
3493 Try to find statements which require boolean type
3494 converison. Additional conversion statements are
3495 added to handle such cases. For example:
3496
3497 bool m_1, m_2, m_3;
3498 int i_4, i_5;
3499 double d_6, d_7;
3500 char c_1, c_2, c_3;
3501
3502 S1 m_1 = i_4 > i_5;
3503 S2 m_2 = d_6 < d_7;
3504 S3 m_3 = m_1 & m_2;
3505 S4 c_1 = m_3 ? c_2 : c_3;
3506
3507 Will be transformed into:
3508
3509 S1 m_1 = i_4 > i_5;
3510 S2 m_2 = d_6 < d_7;
3511 S3'' m_2' = (_Bool[bitsize=32])m_2
3512 S3' m_3' = m_1 & m_2';
3513 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3514 S4' c_1' = m_3'' ? c_2 : c_3; */
3515
3516static gimple *
3517vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3518 tree *type_out)
3519{
3520 gimple *last_stmt = stmts->pop ();
3521 enum tree_code rhs_code;
3522 tree lhs, rhs1, rhs2, tmp, rhs1_type, rhs2_type, vectype1, vectype2;
3523 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3524 stmt_vec_info pattern_stmt_info;
3525 vec_info *vinfo = stmt_vinfo->vinfo;
3526 gimple *pattern_stmt;
3527
3528 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3529 if (is_gimple_call (last_stmt)
3530 && gimple_call_internal_p (last_stmt)
3531 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3532 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3533 {
3534 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3535
3536 if (load)
3537 {
3538 lhs = gimple_call_lhs (last_stmt);
3539 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3540 }
3541 else
3542 {
3543 rhs2 = gimple_call_arg (last_stmt, 3);
3544 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3545 }
3546
3547 rhs1 = gimple_call_arg (last_stmt, 2);
3548 rhs1_type = search_type_for_mask (rhs1, vinfo);
3549 if (!rhs1_type)
3550 return NULL;
3551 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3552
3553 if (!vectype1 || !vectype2
3554 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3555 return NULL;
3556
3557 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3558
3559 if (load)
3560 {
3561 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3562 pattern_stmt
3563 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3564 gimple_call_arg (last_stmt, 0),
3565 gimple_call_arg (last_stmt, 1),
3566 tmp);
3567 gimple_call_set_lhs (pattern_stmt, lhs);
3568 }
3569 else
3570 pattern_stmt
3571 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3572 gimple_call_arg (last_stmt, 0),
3573 gimple_call_arg (last_stmt, 1),
3574 tmp,
3575 gimple_call_arg (last_stmt, 3));
3576
3577
3578 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3579 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3580 STMT_VINFO_DATA_REF (pattern_stmt_info)
3581 = STMT_VINFO_DATA_REF (stmt_vinfo);
3582 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3583 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3584 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3585 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3586 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3587 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3588 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3589 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3590 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3591
3592 *type_out = vectype1;
3593 *type_in = vectype1;
3594 stmts->safe_push (last_stmt);
3595 if (dump_enabled_p ())
3596 dump_printf_loc (MSG_NOTE, vect_location,
3597 "vect_recog_mask_conversion_pattern: detected:\n");
3598
3599 return pattern_stmt;
3600 }
3601
3602 if (!is_gimple_assign (last_stmt))
3603 return NULL;
3604
3605 lhs = gimple_assign_lhs (last_stmt);
3606 rhs1 = gimple_assign_rhs1 (last_stmt);
3607 rhs_code = gimple_assign_rhs_code (last_stmt);
3608
3609 /* Check for cond expression requiring mask conversion. */
3610 if (rhs_code == COND_EXPR)
3611 {
3612 /* vect_recog_mixed_size_cond_pattern could apply.
3613 Do nothing then. */
3614 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3615 return NULL;
3616
3617 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3618
3619 if (TREE_CODE (rhs1) == SSA_NAME)
3620 {
3621 rhs1_type = search_type_for_mask (rhs1, vinfo);
3622 if (!rhs1_type)
3623 return NULL;
3624 }
3625 else
3626 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3627
3628 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3629
3630 if (!vectype1 || !vectype2
3631 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3632 return NULL;
3633
3634 /* If rhs1 is a comparison we need to move it into a
3635 separate statement. */
3636 if (TREE_CODE (rhs1) != SSA_NAME)
3637 {
3638 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3639 pattern_stmt = gimple_build_assign (tmp, rhs1);
3640 rhs1 = tmp;
3641
3642 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3643 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3644 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
3645 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3646 }
3647
3648 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3649
3650 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3651 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
3652 gimple_assign_rhs2 (last_stmt),
3653 gimple_assign_rhs3 (last_stmt));
3654
3655 *type_out = vectype1;
3656 *type_in = vectype1;
3657 stmts->safe_push (last_stmt);
3658 if (dump_enabled_p ())
3659 dump_printf_loc (MSG_NOTE, vect_location,
3660 "vect_recog_mask_conversion_pattern: detected:\n");
3661
3662 return pattern_stmt;
3663 }
3664
3665 /* Now check for binary boolean operations requiring conversion for
3666 one of operands. */
3667 if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)
3668 return NULL;
3669
3670 if (rhs_code != BIT_IOR_EXPR
3671 && rhs_code != BIT_XOR_EXPR
3672 && rhs_code != BIT_AND_EXPR)
3673 return NULL;
3674
3675 rhs2 = gimple_assign_rhs2 (last_stmt);
3676
3677 rhs1_type = search_type_for_mask (rhs1, vinfo);
3678 rhs2_type = search_type_for_mask (rhs2, vinfo);
3679
3680 if (!rhs1_type || !rhs2_type
3681 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
3682 return NULL;
3683
3684 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
3685 {
3686 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
3687 if (!vectype1)
3688 return NULL;
3689 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
3690 }
3691 else
3692 {
3693 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
3694 if (!vectype1)
3695 return NULL;
3696 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3697 }
3698
3699 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3700 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
3701
3702 *type_out = vectype1;
3703 *type_in = vectype1;
3704 stmts->safe_push (last_stmt);
3705 if (dump_enabled_p ())
3706 dump_printf_loc (MSG_NOTE, vect_location,
3707 "vect_recog_mask_conversion_pattern: detected:\n");
3708
3709 return pattern_stmt;
3710}
3711
3712
1107f3ae
IR
3713/* Mark statements that are involved in a pattern. */
3714
3715static inline void
355fe088 3716vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
1107f3ae
IR
3717 tree pattern_vectype)
3718{
3719 stmt_vec_info pattern_stmt_info, def_stmt_info;
3720 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
310213d4 3721 vec_info *vinfo = orig_stmt_info->vinfo;
355fe088 3722 gimple *def_stmt;
1107f3ae 3723
1107f3ae 3724 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
ab0ef706
JJ
3725 if (pattern_stmt_info == NULL)
3726 {
310213d4 3727 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
ab0ef706
JJ
3728 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3729 }
3730 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1107f3ae
IR
3731
3732 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3733 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
ab0ef706 3734 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1107f3ae
IR
3735 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3736 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3737 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
363477c0
JJ
3738 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3739 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3740 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
1107f3ae 3741 {
363477c0
JJ
3742 gimple_stmt_iterator si;
3743 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3744 !gsi_end_p (si); gsi_next (&si))
69d2aade 3745 {
363477c0
JJ
3746 def_stmt = gsi_stmt (si);
3747 def_stmt_info = vinfo_for_stmt (def_stmt);
3748 if (def_stmt_info == NULL)
3749 {
310213d4 3750 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
363477c0
JJ
3751 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3752 }
3753 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3754 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
56f8faae 3755 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
363477c0
JJ
3756 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3757 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
69d2aade 3758 }
1107f3ae
IR
3759 }
3760}
3761
b8698a0f 3762/* Function vect_pattern_recog_1
20f06221
DN
3763
3764 Input:
3765 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3766 computation pattern.
3767 STMT: A stmt from which the pattern search should start.
3768
3769 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
b8698a0f
L
3770 expression that computes the same functionality and can be used to
3771 replace the sequence of stmts that are involved in the pattern.
20f06221
DN
3772
3773 Output:
b8698a0f
L
3774 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3775 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3776 relevant vector type. If 'TYPE_IN' is already a vector type, then this
20f06221
DN
3777 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3778 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3779 to the available target pattern.
3780
b8698a0f 3781 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
3782 for vect_recog_pattern. */
3783
3784static void
92aea285
JJ
3785vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3786 gimple_stmt_iterator si,
355fe088 3787 vec<gimple *> *stmts_to_replace)
20f06221 3788{
355fe088 3789 gimple *stmt = gsi_stmt (si), *pattern_stmt;
383d9c83 3790 stmt_vec_info stmt_info;
383d9c83 3791 loop_vec_info loop_vinfo;
20f06221
DN
3792 tree pattern_vectype;
3793 tree type_in, type_out;
20f06221 3794 enum tree_code code;
b5aeb3bb 3795 int i;
355fe088 3796 gimple *next;
20f06221 3797
9771b263
DN
3798 stmts_to_replace->truncate (0);
3799 stmts_to_replace->quick_push (stmt);
d1fc143d 3800 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
726a989a 3801 if (!pattern_stmt)
b8698a0f
L
3802 return;
3803
9771b263 3804 stmt = stmts_to_replace->last ();
383d9c83
IR
3805 stmt_info = vinfo_for_stmt (stmt);
3806 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3807
e6f5c25d
IE
3808 if (VECTOR_BOOLEAN_TYPE_P (type_in)
3809 || VECTOR_MODE_P (TYPE_MODE (type_in)))
b8698a0f
L
3810 {
3811 /* No need to check target support (already checked by the pattern
3812 recognition function). */
b690cc0f 3813 pattern_vectype = type_out ? type_out : type_in;
20f06221
DN
3814 }
3815 else
3816 {
ef4bddc2 3817 machine_mode vec_mode;
20f06221
DN
3818 enum insn_code icode;
3819 optab optab;
3820
3821 /* Check target support */
b690cc0f
RG
3822 type_in = get_vectype_for_scalar_type (type_in);
3823 if (!type_in)
3824 return;
3825 if (type_out)
3826 type_out = get_vectype_for_scalar_type (type_out);
3827 else
3828 type_out = type_in;
15bbc165
AO
3829 if (!type_out)
3830 return;
b690cc0f 3831 pattern_vectype = type_out;
03d3e953 3832
726a989a
RB
3833 if (is_gimple_assign (pattern_stmt))
3834 code = gimple_assign_rhs_code (pattern_stmt);
3835 else
3836 {
3837 gcc_assert (is_gimple_call (pattern_stmt));
3838 code = CALL_EXPR;
3839 }
3840
b690cc0f
RG
3841 optab = optab_for_tree_code (code, type_in, optab_default);
3842 vec_mode = TYPE_MODE (type_in);
20f06221 3843 if (!optab
947131ba 3844 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
b690cc0f 3845 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
20f06221
DN
3846 return;
3847 }
3848
3849 /* Found a vectorizable pattern. */
73fbfcad 3850 if (dump_enabled_p ())
20f06221 3851 {
ccb3ad87 3852 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 3853 "pattern recognized: ");
ccb3ad87 3854 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
20f06221 3855 }
b8698a0f 3856
726a989a 3857 /* Mark the stmts that are involved in the pattern. */
1107f3ae 3858 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
20f06221 3859
b5aeb3bb
IR
3860 /* Patterns cannot be vectorized using SLP, because they change the order of
3861 computation. */
f5709183 3862 if (loop_vinfo)
9771b263 3863 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
f5709183 3864 if (next == stmt)
9771b263 3865 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
51312233 3866
1107f3ae
IR
3867 /* It is possible that additional pattern stmts are created and inserted in
3868 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3869 relevant statements. */
9771b263
DN
3870 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3871 && (unsigned) i < (stmts_to_replace->length () - 1);
51312233
IR
3872 i++)
3873 {
3874 stmt_info = vinfo_for_stmt (stmt);
3875 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
73fbfcad 3876 if (dump_enabled_p ())
51312233 3877 {
ccb3ad87 3878 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 3879 "additional pattern stmt: ");
ccb3ad87 3880 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
51312233
IR
3881 }
3882
1107f3ae 3883 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
51312233 3884 }
20f06221
DN
3885}
3886
3887
3888/* Function vect_pattern_recog
3889
3890 Input:
3891 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3892 computation idioms.
3893
9d5e7640
IR
3894 Output - for each computation idiom that is detected we create a new stmt
3895 that provides the same functionality and that can be vectorized. We
20f06221
DN
3896 also record some information in the struct_stmt_info of the relevant
3897 stmts, as explained below:
3898
3899 At the entry to this function we have the following stmts, with the
3900 following initial value in the STMT_VINFO fields:
3901
3902 stmt in_pattern_p related_stmt vec_stmt
3903 S1: a_i = .... - - -
3904 S2: a_2 = ..use(a_i).. - - -
3905 S3: a_1 = ..use(a_2).. - - -
3906 S4: a_0 = ..use(a_1).. - - -
3907 S5: ... = ..use(a_0).. - - -
3908
3909 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
3910 represented by a single stmt. We then:
3911 - create a new stmt S6 equivalent to the pattern (the stmt is not
3912 inserted into the code)
20f06221
DN
3913 - fill in the STMT_VINFO fields as follows:
3914
3915 in_pattern_p related_stmt vec_stmt
b8698a0f 3916 S1: a_i = .... - - -
20f06221
DN
3917 S2: a_2 = ..use(a_i).. - - -
3918 S3: a_1 = ..use(a_2).. - - -
20f06221 3919 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 3920 '---> S6: a_new = .... - S4 -
20f06221
DN
3921 S5: ... = ..use(a_0).. - - -
3922
3923 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 3924 to each other through the RELATED_STMT field).
20f06221
DN
3925
3926 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3927 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3928 remain irrelevant unless used by stmts other than S4.
3929
3930 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 3931 (because they are marked as irrelevant). It will vectorize S6, and record
83197f37
IR
3932 a pointer to the new vector stmt VS6 from S6 (as usual).
3933 S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
3934
3935 in_pattern_p related_stmt vec_stmt
3936 S1: a_i = .... - - -
3937 S2: a_2 = ..use(a_i).. - - -
3938 S3: a_1 = ..use(a_2).. - - -
3939 > VS6: va_new = .... - - -
20f06221 3940 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 3941 '---> S6: a_new = .... - S4 VS6
20f06221
DN
3942 > VS5: ... = ..vuse(va_new).. - - -
3943 S5: ... = ..use(a_0).. - - -
3944
9d5e7640 3945 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
3946 elsewhere), and we'll end up with:
3947
b8698a0f 3948 VS6: va_new = ....
83197f37
IR
3949 VS5: ... = ..vuse(va_new)..
3950
3951 In case of more than one pattern statements, e.g., widen-mult with
3952 intermediate type:
3953
3954 S1 a_t = ;
3955 S2 a_T = (TYPE) a_t;
3956 '--> S3: a_it = (interm_type) a_t;
3957 S4 prod_T = a_T * CONST;
3958 '--> S5: prod_T' = a_it w* CONST;
3959
3960 there may be other users of a_T outside the pattern. In that case S2 will
3961 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3962 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3963 be recorded in S3. */
20f06221
DN
3964
3965void
310213d4 3966vect_pattern_recog (vec_info *vinfo)
20f06221 3967{
f5709183 3968 struct loop *loop;
772e61e1 3969 basic_block *bbs;
f5709183 3970 unsigned int nbbs;
726a989a 3971 gimple_stmt_iterator si;
20f06221 3972 unsigned int i, j;
92aea285 3973 vect_recog_func_ptr vect_recog_func;
355fe088
TS
3974 auto_vec<gimple *, 1> stmts_to_replace;
3975 gimple *stmt;
20f06221 3976
73fbfcad 3977 if (dump_enabled_p ())
78c60e3d 3978 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3979 "=== vect_pattern_recog ===\n");
20f06221 3980
310213d4 3981 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
f5709183
IR
3982 {
3983 loop = LOOP_VINFO_LOOP (loop_vinfo);
3984 bbs = LOOP_VINFO_BBS (loop_vinfo);
3985 nbbs = loop->num_nodes;
61d371eb
RB
3986
3987 /* Scan through the loop stmts, applying the pattern recognition
3988 functions starting at each stmt visited: */
3989 for (i = 0; i < nbbs; i++)
3990 {
3991 basic_block bb = bbs[i];
3992 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3993 {
3994 /* Scan over all generic vect_recog_xxx_pattern functions. */
3995 for (j = 0; j < NUM_PATTERNS; j++)
3996 {
3997 vect_recog_func = vect_vect_recog_func_ptrs[j];
3998 vect_pattern_recog_1 (vect_recog_func, si,
3999 &stmts_to_replace);
4000 }
4001 }
4002 }
f5709183
IR
4003 }
4004 else
4005 {
61d371eb
RB
4006 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4007 for (si = bb_vinfo->region_begin;
4008 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4009 {
4010 if ((stmt = gsi_stmt (si))
f5709183
IR
4011 && vinfo_for_stmt (stmt)
4012 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
61d371eb 4013 continue;
f5709183 4014
61d371eb
RB
4015 /* Scan over all generic vect_recog_xxx_pattern functions. */
4016 for (j = 0; j < NUM_PATTERNS; j++)
4017 {
92aea285
JJ
4018 vect_recog_func = vect_vect_recog_func_ptrs[j];
4019 vect_pattern_recog_1 (vect_recog_func, si,
d1fc143d 4020 &stmts_to_replace);
61d371eb
RB
4021 }
4022 }
20f06221
DN
4023 }
4024}