]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
sse.md (*vec_concatv2df): Declare added alternatives as sselog type.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
5624e564 2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
20f06221
DN
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
20f06221
DN
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20f06221
DN
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
c7131fb2 24#include "backend.h"
20f06221 25#include "tree.h"
c7131fb2
AM
26#include "gimple.h"
27#include "rtl.h"
28#include "ssa.h"
29#include "alias.h"
40e23961 30#include "fold-const.h"
d8a2d370 31#include "stor-layout.h"
20f06221 32#include "target.h"
60393bbc 33#include "dominance.h"
cf835838 34#include "gimple-pretty-print.h"
2fb9a547
AM
35#include "internal-fn.h"
36#include "tree-eh.h"
45b0be94 37#include "gimplify.h"
5be5c238 38#include "gimple-iterator.h"
20f06221 39#include "cfgloop.h"
36566b39 40#include "flags.h"
36566b39
PK
41#include "insn-config.h"
42#include "expmed.h"
43#include "dojump.h"
44#include "explow.h"
45#include "calls.h"
46#include "emit-rtl.h"
47#include "varasm.h"
48#include "stmt.h"
20f06221 49#include "expr.h"
b0710fe1 50#include "insn-codes.h"
20f06221
DN
51#include "optabs.h"
52#include "params.h"
53#include "tree-data-ref.h"
54#include "tree-vectorizer.h"
7ee2468b 55#include "recog.h" /* FIXME: for insn_data */
718f9c0f 56#include "diagnostic-core.h"
7ee2468b 57#include "dumpfile.h"
9b2b7279 58#include "builtins.h"
20f06221 59
20f06221 60/* Pattern recognition functions */
9771b263 61static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
51312233 62 tree *);
9771b263 63static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
51312233 64 tree *);
9771b263 65static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
51312233 66 tree *);
79d652a5
CH
67static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
68 tree *);
9771b263
DN
69static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
70static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
1107f3ae 71 tree *);
9771b263 72static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
36ba4aae 73 tree *, tree *);
7e9a3abb 74static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
9771b263 75static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
732a0ad3 76 tree *, tree *);
9771b263 77static gimple vect_recog_divmod_pattern (vec<gimple> *,
079c527f 78 tree *, tree *);
9771b263 79static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
69d2aade 80 tree *, tree *);
9771b263 81static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
20f06221
DN
82static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
83 vect_recog_widen_mult_pattern,
84 vect_recog_widen_sum_pattern,
0b2229b0 85 vect_recog_dot_prod_pattern,
79d652a5 86 vect_recog_sad_pattern,
1107f3ae 87 vect_recog_pow_pattern,
36ba4aae 88 vect_recog_widen_shift_pattern,
33018845 89 vect_recog_over_widening_pattern,
7e9a3abb 90 vect_recog_rotate_pattern,
732a0ad3 91 vect_recog_vector_vector_shift_pattern,
079c527f 92 vect_recog_divmod_pattern,
71c92d17
JJ
93 vect_recog_mixed_size_cond_pattern,
94 vect_recog_bool_pattern};
20f06221 95
083481d8
JJ
96static inline void
97append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
98{
a1a6c5b2
JJ
99 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
100 stmt);
083481d8
JJ
101}
102
103static inline void
104new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
105{
106 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
107 append_pattern_def_seq (stmt_info, stmt);
108}
109
f71cf56a
UW
110/* Check whether STMT2 is in the same loop or basic block as STMT1.
111 Which of the two applies depends on whether we're currently doing
112 loop-based or basic-block-based vectorization, as determined by
113 the vinfo_for_stmt for STMT1 (which must be defined).
114
115 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
116 to be defined as well. */
117
118static bool
119vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
120{
121 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
122 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
123 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
124
125 if (!gimple_bb (stmt2))
126 return false;
127
128 if (loop_vinfo)
129 {
130 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
131 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
132 return false;
133 }
134 else
135 {
136 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
137 || gimple_code (stmt2) == GIMPLE_PHI)
138 return false;
139 }
140
141 gcc_assert (vinfo_for_stmt (stmt2));
142 return true;
143}
144
9a7a4398
UW
145/* If the LHS of DEF_STMT has a single use, and that statement is
146 in the same loop or basic block, return it. */
147
148static gimple
149vect_single_imm_use (gimple def_stmt)
150{
151 tree lhs = gimple_assign_lhs (def_stmt);
152 use_operand_p use_p;
153 gimple use_stmt;
154
155 if (!single_imm_use (lhs, &use_p, &use_stmt))
156 return NULL;
157
158 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
159 return NULL;
160
161 return use_stmt;
162}
163
bc4fb355 164/* Check whether NAME, an ssa-name used in USE_STMT,
79d652a5 165 is a result of a type promotion, such that:
20f06221 166 DEF_STMT: NAME = NOP (name0)
383d9c83
IR
167 If CHECK_SIGN is TRUE, check that either both types are signed or both are
168 unsigned. */
20f06221
DN
169
170static bool
bc4fb355
IR
171type_conversion_p (tree name, gimple use_stmt, bool check_sign,
172 tree *orig_type, gimple *def_stmt, bool *promotion)
20f06221
DN
173{
174 tree dummy;
726a989a 175 gimple dummy_gimple;
20f06221
DN
176 loop_vec_info loop_vinfo;
177 stmt_vec_info stmt_vinfo;
20f06221
DN
178 tree type = TREE_TYPE (name);
179 tree oprnd0;
180 enum vect_def_type dt;
181 tree def;
f5709183 182 bb_vec_info bb_vinfo;
20f06221
DN
183
184 stmt_vinfo = vinfo_for_stmt (use_stmt);
185 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183
IR
186 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
187 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
188 &def, &dt))
20f06221
DN
189 return false;
190
8644a673
IR
191 if (dt != vect_internal_def
192 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
193 return false;
194
bc4fb355 195 if (!*def_stmt)
20f06221
DN
196 return false;
197
726a989a 198 if (!is_gimple_assign (*def_stmt))
20f06221
DN
199 return false;
200
bc4fb355 201 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
20f06221
DN
202 return false;
203
726a989a 204 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221 205
bc4fb355
IR
206 *orig_type = TREE_TYPE (oprnd0);
207 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
208 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
209 return false;
210
211 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
212 *promotion = true;
bc4fb355 213 else
79d652a5 214 *promotion = false;
20f06221 215
24ee1384 216 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
f5709183 217 bb_vinfo, &dummy_gimple, &dummy, &dt))
20f06221
DN
218 return false;
219
20f06221
DN
220 return true;
221}
222
726a989a
RB
223/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
224 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
225
226static tree
227vect_recog_temp_ssa_var (tree type, gimple stmt)
228{
83d5977e 229 return make_temp_ssa_name (type, stmt, "patt");
726a989a 230}
20f06221
DN
231
232/* Function vect_recog_dot_prod_pattern
233
234 Try to find the following pattern:
235
236 type x_t, y_t;
237 TYPE1 prod;
238 TYPE2 sum = init;
239 loop:
240 sum_0 = phi <init, sum_1>
241 S1 x_t = ...
242 S2 y_t = ...
243 S3 x_T = (TYPE1) x_t;
244 S4 y_T = (TYPE1) y_t;
245 S5 prod = x_T * y_T;
246 [S6 prod = (TYPE2) prod; #optional]
247 S7 sum_1 = prod + sum_0;
248
b8698a0f
L
249 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
250 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 251 computation.
b8698a0f 252
20f06221
DN
253 Input:
254
51312233
IR
255 * STMTS: Contains a stmt from which the pattern search begins. In the
256 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
257 will be detected.
20f06221
DN
258
259 Output:
260
261 * TYPE_IN: The type of the input arguments to the pattern.
262
263 * TYPE_OUT: The type of the output of this pattern.
264
265 * Return value: A new stmt that will be used to replace the sequence of
266 stmts that constitute the pattern. In this case it will be:
267 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
268
269 Note: The dot-prod idiom is a widening reduction pattern that is
270 vectorized without preserving all the intermediate results. It
271 produces only N/2 (widened) results (by summing up pairs of
272 intermediate results) rather than all N results. Therefore, we
273 cannot allow this pattern when we want to get all the results and in
274 the correct order (as is the case when this computation is in an
275 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 276
726a989a 277static gimple
9771b263 278vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
51312233 279 tree *type_out)
20f06221 280{
9771b263 281 gimple stmt, last_stmt = (*stmts)[0];
20f06221
DN
282 tree oprnd0, oprnd1;
283 tree oprnd00, oprnd01;
51312233 284 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 285 tree type, half_type;
726a989a 286 gimple pattern_stmt;
20f06221 287 tree prod_type;
d29de1bf 288 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 289 struct loop *loop;
f471fe72 290 tree var;
bc4fb355 291 bool promotion;
20f06221 292
f5709183
IR
293 if (!loop_info)
294 return NULL;
295
296 loop = LOOP_VINFO_LOOP (loop_info);
297
328dc477
RB
298 /* We don't allow changing the order of the computation in the inner-loop
299 when doing outer-loop vectorization. */
300 if (loop && nested_in_vect_loop_p (loop, last_stmt))
301 return NULL;
302
51312233 303 if (!is_gimple_assign (last_stmt))
20f06221
DN
304 return NULL;
305
51312233 306 type = gimple_expr_type (last_stmt);
20f06221 307
b8698a0f 308 /* Look for the following pattern
20f06221
DN
309 DX = (TYPE1) X;
310 DY = (TYPE1) Y;
b8698a0f 311 DPROD = DX * DY;
20f06221
DN
312 DDPROD = (TYPE2) DPROD;
313 sum_1 = DDPROD + sum_0;
b8698a0f 314 In which
20f06221
DN
315 - DX is double the size of X
316 - DY is double the size of Y
317 - DX, DY, DPROD all have the same type
318 - sum is the same size of DPROD or bigger
319 - sum has been recognized as a reduction variable.
320
321 This is equivalent to:
322 DPROD = X w* Y; #widen mult
323 sum_1 = DPROD w+ sum_0; #widen summation
324 or
325 DPROD = X w* Y; #widen mult
326 sum_1 = DPROD + sum_0; #summation
327 */
328
329 /* Starting from LAST_STMT, follow the defs of its uses in search
330 of the above pattern. */
331
51312233 332 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
333 return NULL;
334
335 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
336 {
337 /* Has been detected as widening-summation? */
338
339 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a
RB
340 type = gimple_expr_type (stmt);
341 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
20f06221 342 return NULL;
726a989a
RB
343 oprnd0 = gimple_assign_rhs1 (stmt);
344 oprnd1 = gimple_assign_rhs2 (stmt);
20f06221
DN
345 half_type = TREE_TYPE (oprnd0);
346 }
347 else
348 {
726a989a 349 gimple def_stmt;
20f06221 350
51312233
IR
351 oprnd0 = gimple_assign_rhs1 (last_stmt);
352 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
353 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
354 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221 355 return NULL;
51312233 356 stmt = last_stmt;
20f06221 357
bc4fb355
IR
358 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
359 &promotion)
360 && promotion)
20f06221
DN
361 {
362 stmt = def_stmt;
726a989a 363 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
364 }
365 else
366 half_type = type;
367 }
368
51312233 369 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
370 we know that oprnd1 is the reduction variable (defined by a loop-header
371 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
372 Left to check that oprnd0 is defined by a (widen_)mult_expr */
ba02d3bc
RG
373 if (TREE_CODE (oprnd0) != SSA_NAME)
374 return NULL;
20f06221
DN
375
376 prod_type = half_type;
377 stmt = SSA_NAME_DEF_STMT (oprnd0);
3cb35c12
CF
378
379 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
75264e61 380 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
3cb35c12
CF
381 return NULL;
382
b8698a0f 383 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 384 inside the loop (in case we are analyzing an outer-loop). */
726a989a 385 if (!is_gimple_assign (stmt))
b8698a0f 386 return NULL;
20f06221
DN
387 stmt_vinfo = vinfo_for_stmt (stmt);
388 gcc_assert (stmt_vinfo);
8644a673 389 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
b3130586 390 return NULL;
726a989a 391 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
20f06221
DN
392 return NULL;
393 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
394 {
395 /* Has been detected as a widening multiplication? */
396
397 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a 398 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
20f06221
DN
399 return NULL;
400 stmt_vinfo = vinfo_for_stmt (stmt);
401 gcc_assert (stmt_vinfo);
8644a673 402 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
726a989a
RB
403 oprnd00 = gimple_assign_rhs1 (stmt);
404 oprnd01 = gimple_assign_rhs2 (stmt);
56f8faae
CH
405 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
406 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
20f06221
DN
407 }
408 else
409 {
410 tree half_type0, half_type1;
726a989a 411 gimple def_stmt;
20f06221
DN
412 tree oprnd0, oprnd1;
413
726a989a
RB
414 oprnd0 = gimple_assign_rhs1 (stmt);
415 oprnd1 = gimple_assign_rhs2 (stmt);
9600efe1
MM
416 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
417 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
20f06221 418 return NULL;
bc4fb355
IR
419 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
420 &promotion)
421 || !promotion)
20f06221 422 return NULL;
726a989a 423 oprnd00 = gimple_assign_rhs1 (def_stmt);
181f5f3e 424 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
bc4fb355
IR
425 &promotion)
426 || !promotion)
20f06221 427 return NULL;
726a989a 428 oprnd01 = gimple_assign_rhs1 (def_stmt);
9600efe1 429 if (!types_compatible_p (half_type0, half_type1))
20f06221
DN
430 return NULL;
431 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
432 return NULL;
433 }
434
435 half_type = TREE_TYPE (oprnd00);
436 *type_in = half_type;
437 *type_out = type;
b8698a0f 438
20f06221 439 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 440 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
441 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
442 oprnd00, oprnd01, oprnd1);
b8698a0f 443
73fbfcad 444 if (dump_enabled_p ())
20f06221 445 {
ccb3ad87 446 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 447 "vect_recog_dot_prod_pattern: detected: ");
ccb3ad87 448 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 449 dump_printf (MSG_NOTE, "\n");
20f06221 450 }
d29de1bf 451
726a989a 452 return pattern_stmt;
20f06221 453}
b8698a0f 454
51312233 455
79d652a5
CH
456/* Function vect_recog_sad_pattern
457
458 Try to find the following Sum of Absolute Difference (SAD) pattern:
459
460 type x_t, y_t;
461 signed TYPE1 diff, abs_diff;
462 TYPE2 sum = init;
463 loop:
464 sum_0 = phi <init, sum_1>
465 S1 x_t = ...
466 S2 y_t = ...
467 S3 x_T = (TYPE1) x_t;
468 S4 y_T = (TYPE1) y_t;
469 S5 diff = x_T - y_T;
470 S6 abs_diff = ABS_EXPR <diff>;
471 [S7 abs_diff = (TYPE2) abs_diff; #optional]
472 S8 sum_1 = abs_diff + sum_0;
473
474 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
475 same size of 'TYPE1' or bigger. This is a special case of a reduction
476 computation.
477
478 Input:
479
480 * STMTS: Contains a stmt from which the pattern search begins. In the
481 example, when this function is called with S8, the pattern
482 {S3,S4,S5,S6,S7,S8} will be detected.
483
484 Output:
485
486 * TYPE_IN: The type of the input arguments to the pattern.
487
488 * TYPE_OUT: The type of the output of this pattern.
489
490 * Return value: A new stmt that will be used to replace the sequence of
491 stmts that constitute the pattern. In this case it will be:
492 SAD_EXPR <x_t, y_t, sum_0>
493 */
494
495static gimple
496vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
497 tree *type_out)
498{
499 gimple last_stmt = (*stmts)[0];
500 tree sad_oprnd0, sad_oprnd1;
501 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
502 tree half_type;
503 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
504 struct loop *loop;
505 bool promotion;
506
507 if (!loop_info)
508 return NULL;
509
510 loop = LOOP_VINFO_LOOP (loop_info);
511
328dc477
RB
512 /* We don't allow changing the order of the computation in the inner-loop
513 when doing outer-loop vectorization. */
514 if (loop && nested_in_vect_loop_p (loop, last_stmt))
515 return NULL;
516
79d652a5
CH
517 if (!is_gimple_assign (last_stmt))
518 return NULL;
519
520 tree sum_type = gimple_expr_type (last_stmt);
521
522 /* Look for the following pattern
523 DX = (TYPE1) X;
524 DY = (TYPE1) Y;
525 DDIFF = DX - DY;
526 DAD = ABS_EXPR <DDIFF>;
527 DDPROD = (TYPE2) DPROD;
528 sum_1 = DAD + sum_0;
529 In which
530 - DX is at least double the size of X
531 - DY is at least double the size of Y
532 - DX, DY, DDIFF, DAD all have the same type
533 - sum is the same size of DAD or bigger
534 - sum has been recognized as a reduction variable.
535
536 This is equivalent to:
537 DDIFF = X w- Y; #widen sub
538 DAD = ABS_EXPR <DDIFF>;
539 sum_1 = DAD w+ sum_0; #widen summation
540 or
541 DDIFF = X w- Y; #widen sub
542 DAD = ABS_EXPR <DDIFF>;
543 sum_1 = DAD + sum_0; #summation
544 */
545
546 /* Starting from LAST_STMT, follow the defs of its uses in search
547 of the above pattern. */
548
549 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
550 return NULL;
551
552 tree plus_oprnd0, plus_oprnd1;
553
554 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
555 {
556 /* Has been detected as widening-summation? */
557
558 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
559 sum_type = gimple_expr_type (stmt);
560 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
561 return NULL;
562 plus_oprnd0 = gimple_assign_rhs1 (stmt);
563 plus_oprnd1 = gimple_assign_rhs2 (stmt);
564 half_type = TREE_TYPE (plus_oprnd0);
565 }
566 else
567 {
568 gimple def_stmt;
569
79d652a5
CH
570 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
571 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
572 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
573 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
574 return NULL;
575
576 /* The type conversion could be promotion, demotion,
577 or just signed -> unsigned. */
578 if (type_conversion_p (plus_oprnd0, last_stmt, false,
579 &half_type, &def_stmt, &promotion))
580 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
581 else
582 half_type = sum_type;
583 }
584
585 /* So far so good. Since last_stmt was detected as a (summation) reduction,
586 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
587 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
588 Then check that plus_oprnd0 is defined by an abs_expr. */
589
590 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
591 return NULL;
592
593 tree abs_type = half_type;
594 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
595
596 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
597 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
598 return NULL;
599
600 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
601 inside the loop (in case we are analyzing an outer-loop). */
602 if (!is_gimple_assign (abs_stmt))
603 return NULL;
604
605 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
606 gcc_assert (abs_stmt_vinfo);
607 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
608 return NULL;
609 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
610 return NULL;
611
612 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
613 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
614 return NULL;
615 if (TYPE_UNSIGNED (abs_type))
616 return NULL;
617
618 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
619
620 if (TREE_CODE (abs_oprnd) != SSA_NAME)
621 return NULL;
622
623 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
624
625 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
626 if (!gimple_bb (diff_stmt)
627 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
628 return NULL;
629
630 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
631 inside the loop (in case we are analyzing an outer-loop). */
632 if (!is_gimple_assign (diff_stmt))
633 return NULL;
634
635 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
636 gcc_assert (diff_stmt_vinfo);
637 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
638 return NULL;
639 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
640 return NULL;
641
642 tree half_type0, half_type1;
643 gimple def_stmt;
644
645 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
646 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
647
648 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
649 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
650 return NULL;
651 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
652 &half_type0, &def_stmt, &promotion)
653 || !promotion)
654 return NULL;
655 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
656
657 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
658 &half_type1, &def_stmt, &promotion)
659 || !promotion)
660 return NULL;
661 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
662
663 if (!types_compatible_p (half_type0, half_type1))
664 return NULL;
665 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
666 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
667 return NULL;
668
669 *type_in = TREE_TYPE (sad_oprnd0);
670 *type_out = sum_type;
671
672 /* Pattern detected. Create a stmt to be used to replace the pattern: */
673 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
0d0e4a03
JJ
674 gimple pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
675 sad_oprnd1, plus_oprnd1);
79d652a5
CH
676
677 if (dump_enabled_p ())
678 {
679 dump_printf_loc (MSG_NOTE, vect_location,
680 "vect_recog_sad_pattern: detected: ");
681 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
682 dump_printf (MSG_NOTE, "\n");
683 }
684
79d652a5
CH
685 return pattern_stmt;
686}
687
688
36ba4aae
IR
689/* Handle widening operation by a constant. At the moment we support MULT_EXPR
690 and LSHIFT_EXPR.
691
692 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
693 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
51312233
IR
694
695 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
36ba4aae
IR
696 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
697 that satisfies the above restrictions, we can perform a widening opeartion
698 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
566d377a 699 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
51312233
IR
700
701static bool
36ba4aae
IR
702vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
703 tree const_oprnd, tree *oprnd,
566d377a 704 gimple *wstmt, tree type,
36ba4aae 705 tree *half_type, gimple def_stmt)
51312233 706{
83d5977e 707 tree new_type, new_oprnd;
51312233 708
36ba4aae
IR
709 if (code != MULT_EXPR && code != LSHIFT_EXPR)
710 return false;
711
712 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
713 || (code == LSHIFT_EXPR
714 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
715 != 1))
716 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
51312233
IR
717 {
718 /* CONST_OPRND is a constant of HALF_TYPE. */
719 *oprnd = gimple_assign_rhs1 (def_stmt);
720 return true;
721 }
722
f71cf56a
UW
723 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
724 return false;
725
726 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
51312233
IR
727 return false;
728
36ba4aae 729 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
51312233
IR
730 a type 2 times bigger than HALF_TYPE. */
731 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
732 TYPE_UNSIGNED (type));
36ba4aae
IR
733 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
734 || (code == LSHIFT_EXPR
735 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
51312233
IR
736 return false;
737
566d377a
RB
738 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
739 *oprnd = gimple_assign_rhs1 (def_stmt);
740 new_oprnd = make_ssa_name (new_type);
741 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
742 *oprnd = new_oprnd;
51312233
IR
743
744 *half_type = new_type;
745 return true;
746}
747
748
20f06221
DN
749/* Function vect_recog_widen_mult_pattern
750
751 Try to find the following pattern:
752
d367387c
CH
753 type1 a_t;
754 type2 b_t;
20f06221
DN
755 TYPE a_T, b_T, prod_T;
756
757 S1 a_t = ;
758 S2 b_t = ;
759 S3 a_T = (TYPE) a_t;
760 S4 b_T = (TYPE) b_t;
761 S5 prod_T = a_T * b_T;
762
d367387c 763 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
20f06221 764
d47657bd 765 Also detect unsigned cases:
383d9c83 766
d367387c
CH
767 unsigned type1 a_t;
768 unsigned type2 b_t;
383d9c83
IR
769 unsigned TYPE u_prod_T;
770 TYPE a_T, b_T, prod_T;
771
772 S1 a_t = ;
773 S2 b_t = ;
774 S3 a_T = (TYPE) a_t;
775 S4 b_T = (TYPE) b_t;
776 S5 prod_T = a_T * b_T;
777 S6 u_prod_T = (unsigned TYPE) prod_T;
778
779 and multiplication by constants:
780
781 type a_t;
782 TYPE a_T, prod_T;
783
784 S1 a_t = ;
785 S3 a_T = (TYPE) a_t;
786 S5 prod_T = a_T * CONST;
787
51312233
IR
788 A special case of multiplication by constants is when 'TYPE' is 4 times
789 bigger than 'type', but CONST fits an intermediate type 2 times smaller
790 than 'TYPE'. In that case we create an additional pattern stmt for S3
791 to create a variable of the intermediate type, and perform widen-mult
792 on the intermediate type as well:
793
794 type a_t;
795 interm_type a_it;
796 TYPE a_T, prod_T, prod_T';
797
798 S1 a_t = ;
799 S3 a_T = (TYPE) a_t;
800 '--> a_it = (interm_type) a_t;
801 S5 prod_T = a_T * CONST;
802 '--> prod_T' = a_it w* CONST;
20f06221 803
51312233
IR
804 Input/Output:
805
806 * STMTS: Contains a stmt from which the pattern search begins. In the
807 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
808 is detected. In case of unsigned widen-mult, the original stmt (S5) is
809 replaced with S6 in STMTS. In case of multiplication by a constant
810 of an intermediate type (the last case above), STMTS also contains S3
811 (inserted before S5).
20f06221
DN
812
813 Output:
814
815 * TYPE_IN: The type of the input arguments to the pattern.
816
383d9c83 817 * TYPE_OUT: The type of the output of this pattern.
20f06221
DN
818
819 * Return value: A new stmt that will be used to replace the sequence of
383d9c83 820 stmts that constitute the pattern. In this case it will be:
20f06221 821 WIDEN_MULT <a_t, b_t>
d367387c
CH
822 If the result of WIDEN_MULT needs to be converted to a larger type, the
823 returned stmt will be this type conversion stmt.
20f06221
DN
824*/
825
726a989a 826static gimple
9771b263 827vect_recog_widen_mult_pattern (vec<gimple> *stmts,
51312233 828 tree *type_in, tree *type_out)
20f06221 829{
9771b263 830 gimple last_stmt = stmts->pop ();
726a989a 831 gimple def_stmt0, def_stmt1;
89d67cca
DN
832 tree oprnd0, oprnd1;
833 tree type, half_type0, half_type1;
d367387c
CH
834 gimple new_stmt = NULL, pattern_stmt = NULL;
835 tree vectype, vecitype;
726a989a 836 tree var;
89d67cca 837 enum tree_code dummy_code;
5d593372 838 int dummy_int;
9771b263 839 vec<tree> dummy_vec;
36ba4aae 840 bool op1_ok;
bc4fb355 841 bool promotion;
89d67cca 842
51312233 843 if (!is_gimple_assign (last_stmt))
89d67cca
DN
844 return NULL;
845
51312233 846 type = gimple_expr_type (last_stmt);
89d67cca
DN
847
848 /* Starting from LAST_STMT, follow the defs of its uses in search
849 of the above pattern. */
850
51312233 851 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
89d67cca
DN
852 return NULL;
853
51312233
IR
854 oprnd0 = gimple_assign_rhs1 (last_stmt);
855 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
856 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
857 || !types_compatible_p (TREE_TYPE (oprnd1), type))
89d67cca
DN
858 return NULL;
859
383d9c83 860 /* Check argument 0. */
bc4fb355
IR
861 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
862 &promotion)
863 || !promotion)
864 return NULL;
383d9c83 865 /* Check argument 1. */
bc4fb355
IR
866 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
867 &def_stmt1, &promotion);
89d67cca 868
bc4fb355 869 if (op1_ok && promotion)
383d9c83
IR
870 {
871 oprnd0 = gimple_assign_rhs1 (def_stmt0);
872 oprnd1 = gimple_assign_rhs1 (def_stmt1);
873 }
36ba4aae 874 else
383d9c83 875 {
51312233 876 if (TREE_CODE (oprnd1) == INTEGER_CST
383d9c83 877 && TREE_CODE (half_type0) == INTEGER_TYPE
36ba4aae 878 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
566d377a 879 &oprnd0, &new_stmt, type,
36ba4aae 880 &half_type0, def_stmt0))
bfdeda2c
JJ
881 {
882 half_type1 = half_type0;
883 oprnd1 = fold_convert (half_type1, oprnd1);
884 }
383d9c83
IR
885 else
886 return NULL;
887 }
888
d367387c
CH
889 /* If the two arguments have different sizes, convert the one with
890 the smaller type into the larger type. */
891 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
892 {
566d377a
RB
893 /* If we already used up the single-stmt slot give up. */
894 if (new_stmt)
895 return NULL;
896
d367387c
CH
897 tree* oprnd = NULL;
898 gimple def_stmt = NULL;
899
900 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
901 {
902 def_stmt = def_stmt0;
903 half_type0 = half_type1;
904 oprnd = &oprnd0;
905 }
906 else
907 {
908 def_stmt = def_stmt1;
909 half_type1 = half_type0;
910 oprnd = &oprnd1;
911 }
912
913 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
b731b390 914 tree new_oprnd = make_ssa_name (half_type0);
0d0e4a03 915 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
d367387c
CH
916 *oprnd = new_oprnd;
917 }
918
383d9c83
IR
919 /* Handle unsigned case. Look for
920 S6 u_prod_T = (unsigned TYPE) prod_T;
921 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
922 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
923 {
9a7a4398
UW
924 gimple use_stmt;
925 tree use_lhs;
383d9c83
IR
926 tree use_type;
927
928 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
929 return NULL;
930
9a7a4398
UW
931 use_stmt = vect_single_imm_use (last_stmt);
932 if (!use_stmt || !is_gimple_assign (use_stmt)
625a9766 933 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
383d9c83
IR
934 return NULL;
935
936 use_lhs = gimple_assign_lhs (use_stmt);
937 use_type = TREE_TYPE (use_lhs);
938 if (!INTEGRAL_TYPE_P (use_type)
939 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
940 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
941 return NULL;
942
943 type = use_type;
51312233 944 last_stmt = use_stmt;
383d9c83 945 }
89d67cca 946
9600efe1 947 if (!types_compatible_p (half_type0, half_type1))
89d67cca
DN
948 return NULL;
949
d367387c
CH
950 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
951 to get an intermediate result of type ITYPE. In this case we need
952 to build a statement to convert this intermediate result to type TYPE. */
953 tree itype = type;
954 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
955 itype = build_nonstandard_integer_type
956 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
957 TYPE_UNSIGNED (type));
958
89d67cca 959 /* Pattern detected. */
73fbfcad 960 if (dump_enabled_p ())
ccb3ad87 961 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 962 "vect_recog_widen_mult_pattern: detected:\n");
89d67cca
DN
963
964 /* Check target support */
965 vectype = get_vectype_for_scalar_type (half_type0);
d367387c 966 vecitype = get_vectype_for_scalar_type (itype);
03d3e953 967 if (!vectype
d367387c 968 || !vecitype
51312233 969 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
d367387c 970 vecitype, vectype,
a86ec597
RH
971 &dummy_code, &dummy_code,
972 &dummy_int, &dummy_vec))
89d67cca
DN
973 return NULL;
974
975 *type_in = vectype;
d367387c 976 *type_out = get_vectype_for_scalar_type (type);
89d67cca
DN
977
978 /* Pattern supported. Create a stmt to be used to replace the pattern: */
d367387c 979 var = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 980 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
726a989a 981
d367387c
CH
982 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
983 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
984 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
985 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
986
987 /* If the original two operands have different sizes, we may need to convert
988 the smaller one into the larget type. If this is the case, at this point
989 the new stmt is already built. */
990 if (new_stmt)
991 {
992 append_pattern_def_seq (stmt_vinfo, new_stmt);
993 stmt_vec_info new_stmt_info
994 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
995 set_vinfo_for_stmt (new_stmt, new_stmt_info);
996 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
997 }
998
999 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1000 the result of the widen-mult operation into type TYPE. */
1001 if (itype != type)
1002 {
1003 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1004 stmt_vec_info pattern_stmt_info
1005 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1006 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1007 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
0d0e4a03
JJ
1008 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1009 NOP_EXPR,
1010 gimple_assign_lhs (pattern_stmt));
d367387c
CH
1011 }
1012
73fbfcad 1013 if (dump_enabled_p ())
78c60e3d 1014 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
726a989a 1015
9771b263 1016 stmts->safe_push (last_stmt);
726a989a 1017 return pattern_stmt;
20f06221
DN
1018}
1019
1020
0b2229b0
RG
1021/* Function vect_recog_pow_pattern
1022
1023 Try to find the following pattern:
1024
1025 x = POW (y, N);
1026
1027 with POW being one of pow, powf, powi, powif and N being
1028 either 2 or 0.5.
1029
1030 Input:
1031
1032 * LAST_STMT: A stmt from which the pattern search begins.
1033
1034 Output:
1035
1036 * TYPE_IN: The type of the input arguments to the pattern.
1037
1038 * TYPE_OUT: The type of the output of this pattern.
1039
1040 * Return value: A new stmt that will be used to replace the sequence of
1041 stmts that constitute the pattern. In this case it will be:
726a989a 1042 x = x * x
0b2229b0 1043 or
726a989a 1044 x = sqrt (x)
0b2229b0
RG
1045*/
1046
726a989a 1047static gimple
9771b263 1048vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
51312233 1049 tree *type_out)
0b2229b0 1050{
9771b263 1051 gimple last_stmt = (*stmts)[0];
726a989a
RB
1052 tree fn, base, exp = NULL;
1053 gimple stmt;
1054 tree var;
0b2229b0 1055
51312233 1056 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
0b2229b0
RG
1057 return NULL;
1058
51312233 1059 fn = gimple_call_fndecl (last_stmt);
52bd463c
RG
1060 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1061 return NULL;
1062
0b2229b0
RG
1063 switch (DECL_FUNCTION_CODE (fn))
1064 {
1065 case BUILT_IN_POWIF:
1066 case BUILT_IN_POWI:
1067 case BUILT_IN_POWF:
1068 case BUILT_IN_POW:
51312233
IR
1069 base = gimple_call_arg (last_stmt, 0);
1070 exp = gimple_call_arg (last_stmt, 1);
0b2229b0
RG
1071 if (TREE_CODE (exp) != REAL_CST
1072 && TREE_CODE (exp) != INTEGER_CST)
726a989a 1073 return NULL;
0b2229b0
RG
1074 break;
1075
726a989a
RB
1076 default:
1077 return NULL;
0b2229b0
RG
1078 }
1079
1080 /* We now have a pow or powi builtin function call with a constant
1081 exponent. */
1082
0b2229b0
RG
1083 *type_out = NULL_TREE;
1084
1085 /* Catch squaring. */
9541ffee 1086 if ((tree_fits_shwi_p (exp)
9439e9a1 1087 && tree_to_shwi (exp) == 2)
0b2229b0
RG
1088 || (TREE_CODE (exp) == REAL_CST
1089 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
c6b1b49b
RG
1090 {
1091 *type_in = TREE_TYPE (base);
726a989a
RB
1092
1093 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
0d0e4a03 1094 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
726a989a 1095 return stmt;
c6b1b49b 1096 }
0b2229b0
RG
1097
1098 /* Catch square root. */
1099 if (TREE_CODE (exp) == REAL_CST
1100 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1101 {
1102 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
c6b1b49b
RG
1103 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1104 if (*type_in)
1105 {
538dd0b7 1106 gcall *stmt = gimple_build_call (newfn, 1, base);
726a989a
RB
1107 if (vectorizable_function (stmt, *type_in, *type_in)
1108 != NULL_TREE)
1109 {
1110 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
b8698a0f 1111 gimple_call_set_lhs (stmt, var);
726a989a
RB
1112 return stmt;
1113 }
c6b1b49b 1114 }
0b2229b0
RG
1115 }
1116
726a989a 1117 return NULL;
0b2229b0
RG
1118}
1119
1120
20f06221
DN
1121/* Function vect_recog_widen_sum_pattern
1122
1123 Try to find the following pattern:
1124
b8698a0f 1125 type x_t;
20f06221
DN
1126 TYPE x_T, sum = init;
1127 loop:
1128 sum_0 = phi <init, sum_1>
1129 S1 x_t = *p;
1130 S2 x_T = (TYPE) x_t;
1131 S3 sum_1 = x_T + sum_0;
1132
b8698a0f 1133 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 1134 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 1135 a special case of a reduction computation.
20f06221
DN
1136
1137 Input:
1138
1139 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1140 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 1141
20f06221 1142 Output:
b8698a0f 1143
20f06221
DN
1144 * TYPE_IN: The type of the input arguments to the pattern.
1145
1146 * TYPE_OUT: The type of the output of this pattern.
1147
1148 * Return value: A new stmt that will be used to replace the sequence of
1149 stmts that constitute the pattern. In this case it will be:
1150 WIDEN_SUM <x_t, sum_0>
d29de1bf 1151
b8698a0f 1152 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 1153 vectorized without preserving all the intermediate results. It
b8698a0f
L
1154 produces only N/2 (widened) results (by summing up pairs of
1155 intermediate results) rather than all N results. Therefore, we
1156 cannot allow this pattern when we want to get all the results and in
1157 the correct order (as is the case when this computation is in an
d29de1bf 1158 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 1159
726a989a 1160static gimple
9771b263 1161vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
51312233 1162 tree *type_out)
20f06221 1163{
9771b263 1164 gimple stmt, last_stmt = (*stmts)[0];
20f06221 1165 tree oprnd0, oprnd1;
51312233 1166 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 1167 tree type, half_type;
726a989a 1168 gimple pattern_stmt;
d29de1bf 1169 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 1170 struct loop *loop;
726a989a 1171 tree var;
bc4fb355 1172 bool promotion;
20f06221 1173
f5709183
IR
1174 if (!loop_info)
1175 return NULL;
1176
1177 loop = LOOP_VINFO_LOOP (loop_info);
1178
328dc477
RB
1179 /* We don't allow changing the order of the computation in the inner-loop
1180 when doing outer-loop vectorization. */
1181 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1182 return NULL;
1183
51312233 1184 if (!is_gimple_assign (last_stmt))
20f06221
DN
1185 return NULL;
1186
51312233 1187 type = gimple_expr_type (last_stmt);
20f06221
DN
1188
1189 /* Look for the following pattern
1190 DX = (TYPE) X;
1191 sum_1 = DX + sum_0;
1192 In which DX is at least double the size of X, and sum_1 has been
1193 recognized as a reduction variable.
1194 */
1195
1196 /* Starting from LAST_STMT, follow the defs of its uses in search
1197 of the above pattern. */
1198
51312233 1199 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
1200 return NULL;
1201
51312233
IR
1202 oprnd0 = gimple_assign_rhs1 (last_stmt);
1203 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
1204 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1205 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221
DN
1206 return NULL;
1207
51312233 1208 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
1209 we know that oprnd1 is the reduction variable (defined by a loop-header
1210 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1211 Left to check that oprnd0 is defined by a cast from type 'type' to type
1212 'TYPE'. */
1213
bc4fb355
IR
1214 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1215 &promotion)
1216 || !promotion)
1217 return NULL;
20f06221 1218
726a989a 1219 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
1220 *type_in = half_type;
1221 *type_out = type;
1222
1223 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 1224 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1225 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
726a989a 1226
73fbfcad 1227 if (dump_enabled_p ())
20f06221 1228 {
ccb3ad87 1229 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1230 "vect_recog_widen_sum_pattern: detected: ");
ccb3ad87 1231 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1232 dump_printf (MSG_NOTE, "\n");
20f06221 1233 }
d29de1bf 1234
726a989a 1235 return pattern_stmt;
20f06221
DN
1236}
1237
1238
1107f3ae
IR
1239/* Return TRUE if the operation in STMT can be performed on a smaller type.
1240
1241 Input:
1242 STMT - a statement to check.
1243 DEF - we support operations with two operands, one of which is constant.
1244 The other operand can be defined by a demotion operation, or by a
1245 previous statement in a sequence of over-promoted operations. In the
1246 later case DEF is used to replace that operand. (It is defined by a
1247 pattern statement we created for the previous statement in the
1248 sequence).
1249
1250 Input/output:
1251 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1252 NULL, it's the type of DEF.
1253 STMTS - additional pattern statements. If a pattern statement (type
1254 conversion) is created in this function, its original statement is
1255 added to STMTS.
1256
1257 Output:
1258 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1259 operands to use in the new pattern statement for STMT (will be created
1260 in vect_recog_over_widening_pattern ()).
1261 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1262 statements for STMT: the first one is a type promotion and the second
1263 one is the operation itself. We return the type promotion statement
363477c0 1264 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1107f3ae
IR
1265 the second pattern statement. */
1266
1267static bool
1268vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1269 tree *op0, tree *op1, gimple *new_def_stmt,
9771b263 1270 vec<gimple> *stmts)
1107f3ae
IR
1271{
1272 enum tree_code code;
1273 tree const_oprnd, oprnd;
83d5977e 1274 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1107f3ae
IR
1275 gimple def_stmt, new_stmt;
1276 bool first = false;
bc4fb355 1277 bool promotion;
f5709183 1278
d6e1acf6
JJ
1279 *op0 = NULL_TREE;
1280 *op1 = NULL_TREE;
1107f3ae
IR
1281 *new_def_stmt = NULL;
1282
1283 if (!is_gimple_assign (stmt))
1284 return false;
1285
1286 code = gimple_assign_rhs_code (stmt);
1287 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1288 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1289 return false;
1290
1291 oprnd = gimple_assign_rhs1 (stmt);
1292 const_oprnd = gimple_assign_rhs2 (stmt);
1293 type = gimple_expr_type (stmt);
1294
1295 if (TREE_CODE (oprnd) != SSA_NAME
1296 || TREE_CODE (const_oprnd) != INTEGER_CST)
1297 return false;
1298
9ef7adc0
RG
1299 /* If oprnd has other uses besides that in stmt we cannot mark it
1300 as being part of a pattern only. */
1301 if (!has_single_use (oprnd))
1302 return false;
1303
1107f3ae
IR
1304 /* If we are in the middle of a sequence, we use DEF from a previous
1305 statement. Otherwise, OPRND has to be a result of type promotion. */
1306 if (*new_type)
1307 {
1308 half_type = *new_type;
1309 oprnd = def;
1310 }
1311 else
1312 {
1313 first = true;
bc4fb355 1314 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
f71cf56a
UW
1315 &promotion)
1316 || !promotion
1317 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1107f3ae
IR
1318 return false;
1319 }
1320
1321 /* Can we perform the operation on a smaller type? */
1322 switch (code)
1323 {
1324 case BIT_IOR_EXPR:
1325 case BIT_XOR_EXPR:
1326 case BIT_AND_EXPR:
1327 if (!int_fits_type_p (const_oprnd, half_type))
1328 {
1329 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1330 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1331 return false;
1332
1333 interm_type = build_nonstandard_integer_type (
1334 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1335 if (!int_fits_type_p (const_oprnd, interm_type))
1336 return false;
1337 }
1338
1339 break;
1340
1341 case LSHIFT_EXPR:
1342 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1343 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1344 return false;
1345
1346 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1347 (e.g., if the original value was char, the shift amount is at most 8
1348 if we want to use short). */
1349 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1350 return false;
1351
1352 interm_type = build_nonstandard_integer_type (
1353 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1354
1355 if (!vect_supportable_shift (code, interm_type))
1356 return false;
1357
1358 break;
1359
1360 case RSHIFT_EXPR:
1361 if (vect_supportable_shift (code, half_type))
1362 break;
1363
1364 /* Try intermediate type - HALF_TYPE is not supported. */
1365 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1366 return false;
1367
1368 interm_type = build_nonstandard_integer_type (
1369 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1370
1371 if (!vect_supportable_shift (code, interm_type))
1372 return false;
1373
1374 break;
1375
1376 default:
1377 gcc_unreachable ();
1378 }
1379
1380 /* There are four possible cases:
1381 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1382 the first statement in the sequence)
1383 a. The original, HALF_TYPE, is not enough - we replace the promotion
1384 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1385 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1386 promotion.
1387 2. OPRND is defined by a pattern statement we created.
1388 a. Its type is not sufficient for the operation, we create a new stmt:
1389 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1390 this statement in NEW_DEF_STMT, and it is later put in
363477c0 1391 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1107f3ae
IR
1392 b. OPRND is good to use in the new statement. */
1393 if (first)
1394 {
1395 if (interm_type)
1396 {
1397 /* Replace the original type conversion HALF_TYPE->TYPE with
1398 HALF_TYPE->INTERM_TYPE. */
1399 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1400 {
1401 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1402 /* Check if the already created pattern stmt is what we need. */
1403 if (!is_gimple_assign (new_stmt)
625a9766 1404 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1107f3ae
IR
1405 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1406 return false;
1407
9771b263 1408 stmts->safe_push (def_stmt);
1107f3ae
IR
1409 oprnd = gimple_assign_lhs (new_stmt);
1410 }
1411 else
1412 {
1413 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1414 oprnd = gimple_assign_rhs1 (def_stmt);
b731b390 1415 new_oprnd = make_ssa_name (interm_type);
0d0e4a03 1416 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1107f3ae 1417 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
9771b263 1418 stmts->safe_push (def_stmt);
1107f3ae
IR
1419 oprnd = new_oprnd;
1420 }
1421 }
1422 else
1423 {
1424 /* Retrieve the operand before the type promotion. */
1425 oprnd = gimple_assign_rhs1 (def_stmt);
1426 }
1427 }
1428 else
1429 {
1430 if (interm_type)
1431 {
1432 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
b731b390 1433 new_oprnd = make_ssa_name (interm_type);
0d0e4a03 1434 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1107f3ae
IR
1435 oprnd = new_oprnd;
1436 *new_def_stmt = new_stmt;
1437 }
1438
1439 /* Otherwise, OPRND is already set. */
1440 }
1441
1442 if (interm_type)
1443 *new_type = interm_type;
1444 else
1445 *new_type = half_type;
1446
1447 *op0 = oprnd;
1448 *op1 = fold_convert (*new_type, const_oprnd);
1449
1450 return true;
1451}
1452
1453
1454/* Try to find a statement or a sequence of statements that can be performed
1455 on a smaller type:
1456
1457 type x_t;
1458 TYPE x_T, res0_T, res1_T;
1459 loop:
1460 S1 x_t = *p;
1461 S2 x_T = (TYPE) x_t;
1462 S3 res0_T = op (x_T, C0);
1463 S4 res1_T = op (res0_T, C1);
1464 S5 ... = () res1_T; - type demotion
1465
1466 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1467 constants.
1468 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1469 be 'type' or some intermediate type. For now, we expect S5 to be a type
71c92d17 1470 demotion operation. We also check that S3 and S4 have only one use. */
1107f3ae 1471
1107f3ae 1472static gimple
9771b263 1473vect_recog_over_widening_pattern (vec<gimple> *stmts,
1107f3ae
IR
1474 tree *type_in, tree *type_out)
1475{
9771b263 1476 gimple stmt = stmts->pop ();
1107f3ae 1477 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
9a7a4398 1478 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
83d5977e 1479 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1107f3ae 1480 bool first;
b2a1a74d 1481 tree type = NULL;
1107f3ae
IR
1482
1483 first = true;
1484 while (1)
1485 {
1486 if (!vinfo_for_stmt (stmt)
1487 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1488 return NULL;
1489
1490 new_def_stmt = NULL;
1491 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1492 &op0, &op1, &new_def_stmt,
1493 stmts))
1494 {
1495 if (first)
1496 return NULL;
1497 else
1498 break;
1499 }
1500
1501 /* STMT can be performed on a smaller type. Check its uses. */
9a7a4398
UW
1502 use_stmt = vect_single_imm_use (stmt);
1503 if (!use_stmt || !is_gimple_assign (use_stmt))
1107f3ae
IR
1504 return NULL;
1505
1506 /* Create pattern statement for STMT. */
1507 vectype = get_vectype_for_scalar_type (new_type);
1508 if (!vectype)
1509 return NULL;
1510
1511 /* We want to collect all the statements for which we create pattern
1512 statetments, except for the case when the last statement in the
1513 sequence doesn't have a corresponding pattern statement. In such
1514 case we associate the last pattern statement with the last statement
36ba4aae 1515 in the sequence. Therefore, we only add the original statement to
1107f3ae
IR
1516 the list if we know that it is not the last. */
1517 if (prev_stmt)
9771b263 1518 stmts->safe_push (prev_stmt);
1107f3ae
IR
1519
1520 var = vect_recog_temp_ssa_var (new_type, NULL);
62371b92 1521 pattern_stmt
0d0e4a03 1522 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1107f3ae 1523 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
083481d8 1524 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1107f3ae 1525
73fbfcad 1526 if (dump_enabled_p ())
1107f3ae 1527 {
ccb3ad87 1528 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1529 "created pattern stmt: ");
ccb3ad87 1530 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1531 dump_printf (MSG_NOTE, "\n");
1107f3ae
IR
1532 }
1533
b2a1a74d 1534 type = gimple_expr_type (stmt);
1107f3ae
IR
1535 prev_stmt = stmt;
1536 stmt = use_stmt;
1537
1538 first = false;
1539 }
1540
1541 /* We got a sequence. We expect it to end with a type demotion operation.
1542 Otherwise, we quit (for now). There are three possible cases: the
1543 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1544 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1545 NEW_TYPE differs (we create a new conversion statement). */
1546 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1547 {
1548 use_lhs = gimple_assign_lhs (use_stmt);
1549 use_type = TREE_TYPE (use_lhs);
82db3d43 1550 /* Support only type demotion or signedess change. */
1107f3ae 1551 if (!INTEGRAL_TYPE_P (use_type)
82db3d43 1552 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1107f3ae
IR
1553 return NULL;
1554
82db3d43
IR
1555 /* Check that NEW_TYPE is not bigger than the conversion result. */
1556 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1557 return NULL;
1558
1107f3ae
IR
1559 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1560 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1561 {
1562 /* Create NEW_TYPE->USE_TYPE conversion. */
b731b390 1563 new_oprnd = make_ssa_name (use_type);
0d0e4a03 1564 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1107f3ae
IR
1565 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1566
1567 *type_in = get_vectype_for_scalar_type (new_type);
1568 *type_out = get_vectype_for_scalar_type (use_type);
1569
1570 /* We created a pattern statement for the last statement in the
1571 sequence, so we don't need to associate it with the pattern
1572 statement created for PREV_STMT. Therefore, we add PREV_STMT
1573 to the list in order to mark it later in vect_pattern_recog_1. */
1574 if (prev_stmt)
9771b263 1575 stmts->safe_push (prev_stmt);
1107f3ae
IR
1576 }
1577 else
1578 {
1579 if (prev_stmt)
363477c0
JJ
1580 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1581 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1107f3ae
IR
1582
1583 *type_in = vectype;
1584 *type_out = NULL_TREE;
1585 }
1586
9771b263 1587 stmts->safe_push (use_stmt);
1107f3ae
IR
1588 }
1589 else
1590 /* TODO: support general case, create a conversion to the correct type. */
1591 return NULL;
1592
1593 /* Pattern detected. */
73fbfcad 1594 if (dump_enabled_p ())
1107f3ae 1595 {
ccb3ad87 1596 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1597 "vect_recog_over_widening_pattern: detected: ");
ccb3ad87 1598 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1599 dump_printf (MSG_NOTE, "\n");
1107f3ae
IR
1600 }
1601
1602 return pattern_stmt;
1603}
1604
36ba4aae
IR
1605/* Detect widening shift pattern:
1606
1607 type a_t;
1608 TYPE a_T, res_T;
1609
1610 S1 a_t = ;
1611 S2 a_T = (TYPE) a_t;
1612 S3 res_T = a_T << CONST;
1613
1614 where type 'TYPE' is at least double the size of type 'type'.
1615
33018845
UW
1616 Also detect cases where the shift result is immediately converted
1617 to another type 'result_type' that is no larger in size than 'TYPE'.
1618 In those cases we perform a widen-shift that directly results in
1619 'result_type', to avoid a possible over-widening situation:
36ba4aae 1620
33018845 1621 type a_t;
36ba4aae 1622 TYPE a_T, res_T;
33018845 1623 result_type res_result;
36ba4aae
IR
1624
1625 S1 a_t = ;
1626 S2 a_T = (TYPE) a_t;
1627 S3 res_T = a_T << CONST;
33018845
UW
1628 S4 res_result = (result_type) res_T;
1629 '--> res_result' = a_t w<< CONST;
36ba4aae
IR
1630
1631 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1632 create an additional pattern stmt for S2 to create a variable of an
1633 intermediate type, and perform widen-shift on the intermediate type:
1634
1635 type a_t;
1636 interm_type a_it;
1637 TYPE a_T, res_T, res_T';
1638
1639 S1 a_t = ;
1640 S2 a_T = (TYPE) a_t;
1641 '--> a_it = (interm_type) a_t;
1642 S3 res_T = a_T << CONST;
1643 '--> res_T' = a_it <<* CONST;
1644
1645 Input/Output:
1646
1647 * STMTS: Contains a stmt from which the pattern search begins.
1648 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1649 in STMTS. When an intermediate type is used and a pattern statement is
1650 created for S2, we also put S2 here (before S3).
1651
1652 Output:
1653
1654 * TYPE_IN: The type of the input arguments to the pattern.
1655
1656 * TYPE_OUT: The type of the output of this pattern.
1657
1658 * Return value: A new stmt that will be used to replace the sequence of
1659 stmts that constitute the pattern. In this case it will be:
1660 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1661
1662static gimple
9771b263 1663vect_recog_widen_shift_pattern (vec<gimple> *stmts,
36ba4aae
IR
1664 tree *type_in, tree *type_out)
1665{
9771b263 1666 gimple last_stmt = stmts->pop ();
36ba4aae
IR
1667 gimple def_stmt0;
1668 tree oprnd0, oprnd1;
1669 tree type, half_type0;
33018845 1670 gimple pattern_stmt;
36ba4aae 1671 tree vectype, vectype_out = NULL_TREE;
36ba4aae
IR
1672 tree var;
1673 enum tree_code dummy_code;
1674 int dummy_int;
9771b263 1675 vec<tree> dummy_vec;
33018845 1676 gimple use_stmt;
bc4fb355 1677 bool promotion;
36ba4aae
IR
1678
1679 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1680 return NULL;
1681
36ba4aae 1682 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
33018845 1683 return NULL;
36ba4aae
IR
1684
1685 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1686 return NULL;
1687
1688 oprnd0 = gimple_assign_rhs1 (last_stmt);
1689 oprnd1 = gimple_assign_rhs2 (last_stmt);
1690 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1691 return NULL;
1692
1693 /* Check operand 0: it has to be defined by a type promotion. */
bc4fb355 1694 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
566d377a 1695 &promotion)
bc4fb355
IR
1696 || !promotion)
1697 return NULL;
36ba4aae
IR
1698
1699 /* Check operand 1: has to be positive. We check that it fits the type
1700 in vect_handle_widen_op_by_const (). */
1701 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1702 return NULL;
1703
1704 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1705 type = gimple_expr_type (last_stmt);
1706
33018845
UW
1707 /* Check for subsequent conversion to another type. */
1708 use_stmt = vect_single_imm_use (last_stmt);
1709 if (use_stmt && is_gimple_assign (use_stmt)
1710 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1711 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1712 {
1713 tree use_lhs = gimple_assign_lhs (use_stmt);
1714 tree use_type = TREE_TYPE (use_lhs);
1715
1716 if (INTEGRAL_TYPE_P (use_type)
1717 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1718 {
1719 last_stmt = use_stmt;
1720 type = use_type;
1721 }
1722 }
1723
36ba4aae 1724 /* Check if this a widening operation. */
566d377a 1725 gimple wstmt = NULL;
36ba4aae 1726 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
566d377a 1727 &oprnd0, &wstmt,
36ba4aae
IR
1728 type, &half_type0, def_stmt0))
1729 return NULL;
1730
36ba4aae 1731 /* Pattern detected. */
73fbfcad 1732 if (dump_enabled_p ())
ccb3ad87 1733 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1734 "vect_recog_widen_shift_pattern: detected:\n");
36ba4aae
IR
1735
1736 /* Check target support. */
1737 vectype = get_vectype_for_scalar_type (half_type0);
1738 vectype_out = get_vectype_for_scalar_type (type);
1739
1740 if (!vectype
1741 || !vectype_out
1742 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1743 vectype_out, vectype,
a86ec597
RH
1744 &dummy_code, &dummy_code,
1745 &dummy_int, &dummy_vec))
36ba4aae
IR
1746 return NULL;
1747
1748 *type_in = vectype;
1749 *type_out = vectype_out;
1750
1751 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1752 var = vect_recog_temp_ssa_var (type, NULL);
1753 pattern_stmt =
0d0e4a03 1754 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
566d377a
RB
1755 if (wstmt)
1756 {
1757 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1758 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1759 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1760 new_pattern_def_seq (stmt_vinfo, wstmt);
1761 stmt_vec_info new_stmt_info
1762 = new_stmt_vec_info (wstmt, loop_vinfo, bb_vinfo);
1763 set_vinfo_for_stmt (wstmt, new_stmt_info);
1764 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1765 }
36ba4aae 1766
73fbfcad 1767 if (dump_enabled_p ())
78c60e3d 1768 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
7e9a3abb
JJ
1769
1770 stmts->safe_push (last_stmt);
1771 return pattern_stmt;
1772}
1773
1774/* Detect a rotate pattern wouldn't be otherwise vectorized:
1775
1776 type a_t, b_t, c_t;
1777
1778 S0 a_t = b_t r<< c_t;
1779
1780 Input/Output:
1781
1782 * STMTS: Contains a stmt from which the pattern search begins,
1783 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1784 with a sequence:
1785
1786 S1 d_t = -c_t;
1787 S2 e_t = d_t & (B - 1);
1788 S3 f_t = b_t << c_t;
1789 S4 g_t = b_t >> e_t;
1790 S0 a_t = f_t | g_t;
1791
1792 where B is element bitsize of type.
1793
1794 Output:
1795
1796 * TYPE_IN: The type of the input arguments to the pattern.
1797
1798 * TYPE_OUT: The type of the output of this pattern.
1799
1800 * Return value: A new stmt that will be used to replace the rotate
1801 S0 stmt. */
1802
1803static gimple
1804vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1805{
1806 gimple last_stmt = stmts->pop ();
1807 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1808 gimple pattern_stmt, def_stmt;
1809 enum tree_code rhs_code;
1810 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1811 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1812 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1813 enum vect_def_type dt;
1814 optab optab1, optab2;
68119618 1815 edge ext_def = NULL;
7e9a3abb
JJ
1816
1817 if (!is_gimple_assign (last_stmt))
1818 return NULL;
1819
1820 rhs_code = gimple_assign_rhs_code (last_stmt);
1821 switch (rhs_code)
1822 {
1823 case LROTATE_EXPR:
1824 case RROTATE_EXPR:
1825 break;
1826 default:
1827 return NULL;
1828 }
1829
1830 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1831 return NULL;
1832
1833 lhs = gimple_assign_lhs (last_stmt);
1834 oprnd0 = gimple_assign_rhs1 (last_stmt);
1835 type = TREE_TYPE (oprnd0);
1836 oprnd1 = gimple_assign_rhs2 (last_stmt);
1837 if (TREE_CODE (oprnd0) != SSA_NAME
1838 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1839 || !INTEGRAL_TYPE_P (type)
1840 || !TYPE_UNSIGNED (type))
1841 return NULL;
1842
1843 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1844 &def, &dt))
1845 return NULL;
1846
1847 if (dt != vect_internal_def
1848 && dt != vect_constant_def
1849 && dt != vect_external_def)
1850 return NULL;
1851
1852 vectype = get_vectype_for_scalar_type (type);
1853 if (vectype == NULL_TREE)
1854 return NULL;
1855
1856 /* If vector/vector or vector/scalar rotate is supported by the target,
1857 don't do anything here. */
1858 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1859 if (optab1
1860 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1861 return NULL;
1862
1863 if (bb_vinfo != NULL || dt != vect_internal_def)
1864 {
1865 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1866 if (optab2
1867 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1868 return NULL;
1869 }
1870
1871 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1872 don't do anything here either. */
1873 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1874 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1875 if (!optab1
1876 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1877 || !optab2
1878 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1879 {
1880 if (bb_vinfo == NULL && dt == vect_internal_def)
1881 return NULL;
1882 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1883 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1884 if (!optab1
1885 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1886 || !optab2
1887 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1888 return NULL;
1889 }
1890
1891 *type_in = vectype;
1892 *type_out = vectype;
1893 if (*type_in == NULL_TREE)
1894 return NULL;
1895
68119618
JJ
1896 if (dt == vect_external_def
1897 && TREE_CODE (oprnd1) == SSA_NAME
1898 && loop_vinfo)
1899 {
1900 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1901 ext_def = loop_preheader_edge (loop);
1902 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1903 {
1904 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1905 if (bb == NULL
1906 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1907 ext_def = NULL;
1908 }
1909 }
1910
7e9a3abb
JJ
1911 def = NULL_TREE;
1912 if (TREE_CODE (oprnd1) == INTEGER_CST
1913 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1914 def = oprnd1;
1915 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1916 {
1917 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1918 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1919 && TYPE_PRECISION (TREE_TYPE (rhs1))
1920 == TYPE_PRECISION (type))
1921 def = rhs1;
1922 }
1923
1924 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1925 if (def == NULL_TREE)
1926 {
1927 def = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1928 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
68119618
JJ
1929 if (ext_def)
1930 {
1931 basic_block new_bb
1932 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1933 gcc_assert (!new_bb);
1934 }
1935 else
1936 append_pattern_def_seq (stmt_vinfo, def_stmt);
7e9a3abb
JJ
1937 }
1938 stype = TREE_TYPE (def);
1939
1940 if (TREE_CODE (def) == INTEGER_CST)
1941 {
cc269bb6 1942 if (!tree_fits_uhwi_p (def)
7d362f6c 1943 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
7e9a3abb
JJ
1944 || integer_zerop (def))
1945 return NULL;
1946 def2 = build_int_cst (stype,
1947 GET_MODE_PRECISION (TYPE_MODE (type))
ae7e9ddd 1948 - tree_to_uhwi (def));
7e9a3abb
JJ
1949 }
1950 else
1951 {
1952 tree vecstype = get_vectype_for_scalar_type (stype);
1953 stmt_vec_info def_stmt_vinfo;
1954
1955 if (vecstype == NULL_TREE)
1956 return NULL;
1957 def2 = vect_recog_temp_ssa_var (stype, NULL);
0d0e4a03 1958 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
68119618
JJ
1959 if (ext_def)
1960 {
1961 basic_block new_bb
1962 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1963 gcc_assert (!new_bb);
1964 }
1965 else
1966 {
1967 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1968 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1969 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1970 append_pattern_def_seq (stmt_vinfo, def_stmt);
1971 }
7e9a3abb
JJ
1972
1973 def2 = vect_recog_temp_ssa_var (stype, NULL);
1974 tree mask
1975 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
0d0e4a03
JJ
1976 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1977 gimple_assign_lhs (def_stmt), mask);
68119618
JJ
1978 if (ext_def)
1979 {
1980 basic_block new_bb
1981 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1982 gcc_assert (!new_bb);
1983 }
1984 else
1985 {
1986 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1987 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1988 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1989 append_pattern_def_seq (stmt_vinfo, def_stmt);
1990 }
7e9a3abb
JJ
1991 }
1992
1993 var1 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
1994 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1995 ? LSHIFT_EXPR : RSHIFT_EXPR,
1996 oprnd0, def);
7e9a3abb
JJ
1997 append_pattern_def_seq (stmt_vinfo, def_stmt);
1998
1999 var2 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
2000 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2001 ? RSHIFT_EXPR : LSHIFT_EXPR,
2002 oprnd0, def2);
7e9a3abb
JJ
2003 append_pattern_def_seq (stmt_vinfo, def_stmt);
2004
2005 /* Pattern detected. */
2006 if (dump_enabled_p ())
2007 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2008 "vect_recog_rotate_pattern: detected:\n");
7e9a3abb
JJ
2009
2010 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2011 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 2012 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
7e9a3abb
JJ
2013
2014 if (dump_enabled_p ())
2015 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
36ba4aae 2016
9771b263 2017 stmts->safe_push (last_stmt);
36ba4aae
IR
2018 return pattern_stmt;
2019}
1107f3ae 2020
732a0ad3
JJ
2021/* Detect a vector by vector shift pattern that wouldn't be otherwise
2022 vectorized:
2023
2024 type a_t;
2025 TYPE b_T, res_T;
2026
2027 S1 a_t = ;
2028 S2 b_T = ;
2029 S3 res_T = b_T op a_t;
2030
2031 where type 'TYPE' is a type with different size than 'type',
2032 and op is <<, >> or rotate.
2033
2034 Also detect cases:
2035
2036 type a_t;
2037 TYPE b_T, c_T, res_T;
2038
2039 S0 c_T = ;
2040 S1 a_t = (type) c_T;
2041 S2 b_T = ;
2042 S3 res_T = b_T op a_t;
2043
2044 Input/Output:
2045
2046 * STMTS: Contains a stmt from which the pattern search begins,
2047 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2048 with a shift/rotate which has same type on both operands, in the
2049 second case just b_T op c_T, in the first case with added cast
363477c0 2050 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
732a0ad3
JJ
2051
2052 Output:
2053
2054 * TYPE_IN: The type of the input arguments to the pattern.
2055
2056 * TYPE_OUT: The type of the output of this pattern.
2057
2058 * Return value: A new stmt that will be used to replace the shift/rotate
2059 S3 stmt. */
2060
2061static gimple
9771b263 2062vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
732a0ad3
JJ
2063 tree *type_in, tree *type_out)
2064{
9771b263 2065 gimple last_stmt = stmts->pop ();
732a0ad3
JJ
2066 tree oprnd0, oprnd1, lhs, var;
2067 gimple pattern_stmt, def_stmt;
2068 enum tree_code rhs_code;
2069 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2070 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 2071 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
732a0ad3
JJ
2072 enum vect_def_type dt;
2073 tree def;
2074
2075 if (!is_gimple_assign (last_stmt))
2076 return NULL;
2077
2078 rhs_code = gimple_assign_rhs_code (last_stmt);
2079 switch (rhs_code)
2080 {
2081 case LSHIFT_EXPR:
2082 case RSHIFT_EXPR:
2083 case LROTATE_EXPR:
2084 case RROTATE_EXPR:
2085 break;
2086 default:
2087 return NULL;
2088 }
2089
2090 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2091 return NULL;
2092
2093 lhs = gimple_assign_lhs (last_stmt);
2094 oprnd0 = gimple_assign_rhs1 (last_stmt);
2095 oprnd1 = gimple_assign_rhs2 (last_stmt);
2096 if (TREE_CODE (oprnd0) != SSA_NAME
2097 || TREE_CODE (oprnd1) != SSA_NAME
2098 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2099 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2100 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2101 || TYPE_PRECISION (TREE_TYPE (lhs))
2102 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2103 return NULL;
2104
f5709183 2105 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 2106 &def, &dt))
732a0ad3
JJ
2107 return NULL;
2108
2109 if (dt != vect_internal_def)
2110 return NULL;
2111
2112 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2113 *type_out = *type_in;
2114 if (*type_in == NULL_TREE)
2115 return NULL;
2116
2117 def = NULL_TREE;
2118 if (gimple_assign_cast_p (def_stmt))
2119 {
2120 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2121 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2122 && TYPE_PRECISION (TREE_TYPE (rhs1))
2123 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2124 def = rhs1;
2125 }
2126
2127 if (def == NULL_TREE)
2128 {
2129 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2130 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
083481d8 2131 new_pattern_def_seq (stmt_vinfo, def_stmt);
732a0ad3
JJ
2132 }
2133
2134 /* Pattern detected. */
73fbfcad 2135 if (dump_enabled_p ())
ccb3ad87 2136 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2137 "vect_recog_vector_vector_shift_pattern: detected:\n");
732a0ad3
JJ
2138
2139 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2140 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2141 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
732a0ad3 2142
73fbfcad 2143 if (dump_enabled_p ())
78c60e3d 2144 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
732a0ad3 2145
9771b263 2146 stmts->safe_push (last_stmt);
732a0ad3
JJ
2147 return pattern_stmt;
2148}
2149
079c527f 2150/* Detect a signed division by a constant that wouldn't be
363477c0
JJ
2151 otherwise vectorized:
2152
2153 type a_t, b_t;
2154
2155 S1 a_t = b_t / N;
2156
079c527f 2157 where type 'type' is an integral type and N is a constant.
363477c0 2158
079c527f 2159 Similarly handle modulo by a constant:
363477c0
JJ
2160
2161 S4 a_t = b_t % N;
2162
2163 Input/Output:
2164
2165 * STMTS: Contains a stmt from which the pattern search begins,
079c527f
JJ
2166 i.e. the division stmt. S1 is replaced by if N is a power
2167 of two constant and type is signed:
363477c0
JJ
2168 S3 y_t = b_t < 0 ? N - 1 : 0;
2169 S2 x_t = b_t + y_t;
2170 S1' a_t = x_t >> log2 (N);
2171
079c527f
JJ
2172 S4 is replaced if N is a power of two constant and
2173 type is signed by (where *_T temporaries have unsigned type):
363477c0
JJ
2174 S9 y_T = b_t < 0 ? -1U : 0U;
2175 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2176 S7 z_t = (type) z_T;
2177 S6 w_t = b_t + z_t;
2178 S5 x_t = w_t & (N - 1);
2179 S4' a_t = x_t - z_t;
2180
2181 Output:
2182
2183 * TYPE_IN: The type of the input arguments to the pattern.
2184
2185 * TYPE_OUT: The type of the output of this pattern.
2186
2187 * Return value: A new stmt that will be used to replace the division
2188 S1 or modulo S4 stmt. */
2189
2190static gimple
9771b263 2191vect_recog_divmod_pattern (vec<gimple> *stmts,
079c527f 2192 tree *type_in, tree *type_out)
363477c0 2193{
9771b263 2194 gimple last_stmt = stmts->pop ();
5deb57cb 2195 tree oprnd0, oprnd1, vectype, itype, cond;
363477c0
JJ
2196 gimple pattern_stmt, def_stmt;
2197 enum tree_code rhs_code;
2198 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2199 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
079c527f 2200 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
363477c0 2201 optab optab;
00f07b86 2202 tree q;
079c527f 2203 int dummy_int, prec;
079c527f 2204 stmt_vec_info def_stmt_vinfo;
363477c0
JJ
2205
2206 if (!is_gimple_assign (last_stmt))
2207 return NULL;
2208
2209 rhs_code = gimple_assign_rhs_code (last_stmt);
2210 switch (rhs_code)
2211 {
2212 case TRUNC_DIV_EXPR:
2213 case TRUNC_MOD_EXPR:
2214 break;
2215 default:
2216 return NULL;
2217 }
2218
2219 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2220 return NULL;
2221
2222 oprnd0 = gimple_assign_rhs1 (last_stmt);
2223 oprnd1 = gimple_assign_rhs2 (last_stmt);
2224 itype = TREE_TYPE (oprnd0);
2225 if (TREE_CODE (oprnd0) != SSA_NAME
2226 || TREE_CODE (oprnd1) != INTEGER_CST
2227 || TREE_CODE (itype) != INTEGER_TYPE
079c527f 2228 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
363477c0
JJ
2229 return NULL;
2230
2231 vectype = get_vectype_for_scalar_type (itype);
2232 if (vectype == NULL_TREE)
2233 return NULL;
2234
2235 /* If the target can handle vectorized division or modulo natively,
2236 don't attempt to optimize this. */
2237 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2225b9f2 2238 if (optab != unknown_optab)
363477c0 2239 {
ef4bddc2 2240 machine_mode vec_mode = TYPE_MODE (vectype);
363477c0 2241 int icode = (int) optab_handler (optab, vec_mode);
e6d4f8f5 2242 if (icode != CODE_FOR_nothing)
363477c0
JJ
2243 return NULL;
2244 }
2245
079c527f
JJ
2246 prec = TYPE_PRECISION (itype);
2247 if (integer_pow2p (oprnd1))
363477c0 2248 {
079c527f
JJ
2249 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2250 return NULL;
363477c0 2251
079c527f 2252 /* Pattern detected. */
73fbfcad 2253 if (dump_enabled_p ())
ccb3ad87 2254 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2255 "vect_recog_divmod_pattern: detected:\n");
079c527f
JJ
2256
2257 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2258 build_int_cst (itype, 0));
2259 if (rhs_code == TRUNC_DIV_EXPR)
2260 {
2261 tree var = vect_recog_temp_ssa_var (itype, NULL);
2262 tree shift;
2263 def_stmt
0d0e4a03
JJ
2264 = gimple_build_assign (var, COND_EXPR, cond,
2265 fold_build2 (MINUS_EXPR, itype, oprnd1,
2266 build_int_cst (itype, 1)),
2267 build_int_cst (itype, 0));
079c527f
JJ
2268 new_pattern_def_seq (stmt_vinfo, def_stmt);
2269 var = vect_recog_temp_ssa_var (itype, NULL);
2270 def_stmt
0d0e4a03
JJ
2271 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2272 gimple_assign_lhs (def_stmt));
079c527f
JJ
2273 append_pattern_def_seq (stmt_vinfo, def_stmt);
2274
2275 shift = build_int_cst (itype, tree_log2 (oprnd1));
2276 pattern_stmt
0d0e4a03
JJ
2277 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2278 RSHIFT_EXPR, var, shift);
079c527f
JJ
2279 }
2280 else
2281 {
2282 tree signmask;
2283 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2284 if (compare_tree_int (oprnd1, 2) == 0)
2285 {
2286 signmask = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2287 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2288 build_int_cst (itype, 1),
2289 build_int_cst (itype, 0));
079c527f
JJ
2290 append_pattern_def_seq (stmt_vinfo, def_stmt);
2291 }
2292 else
2293 {
2294 tree utype
2295 = build_nonstandard_integer_type (prec, 1);
2296 tree vecutype = get_vectype_for_scalar_type (utype);
2297 tree shift
2298 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2299 - tree_log2 (oprnd1));
2300 tree var = vect_recog_temp_ssa_var (utype, NULL);
2301
0d0e4a03
JJ
2302 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2303 build_int_cst (utype, -1),
2304 build_int_cst (utype, 0));
079c527f
JJ
2305 def_stmt_vinfo
2306 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2307 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2308 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2309 append_pattern_def_seq (stmt_vinfo, def_stmt);
2310 var = vect_recog_temp_ssa_var (utype, NULL);
0d0e4a03
JJ
2311 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2312 gimple_assign_lhs (def_stmt),
2313 shift);
079c527f
JJ
2314 def_stmt_vinfo
2315 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2316 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2317 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2318 append_pattern_def_seq (stmt_vinfo, def_stmt);
2319 signmask = vect_recog_temp_ssa_var (itype, NULL);
2320 def_stmt
0d0e4a03 2321 = gimple_build_assign (signmask, NOP_EXPR, var);
079c527f
JJ
2322 append_pattern_def_seq (stmt_vinfo, def_stmt);
2323 }
2324 def_stmt
0d0e4a03
JJ
2325 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2326 PLUS_EXPR, oprnd0, signmask);
079c527f
JJ
2327 append_pattern_def_seq (stmt_vinfo, def_stmt);
2328 def_stmt
0d0e4a03
JJ
2329 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2330 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2331 fold_build2 (MINUS_EXPR, itype, oprnd1,
2332 build_int_cst (itype, 1)));
079c527f
JJ
2333 append_pattern_def_seq (stmt_vinfo, def_stmt);
2334
2335 pattern_stmt
0d0e4a03
JJ
2336 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2337 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2338 signmask);
079c527f
JJ
2339 }
2340
73fbfcad 2341 if (dump_enabled_p ())
78c60e3d
SS
2342 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2343 0);
079c527f 2344
9771b263 2345 stmts->safe_push (last_stmt);
079c527f
JJ
2346
2347 *type_in = vectype;
2348 *type_out = vectype;
2349 return pattern_stmt;
363477c0 2350 }
079c527f 2351
6b58915b
RS
2352 if (prec > HOST_BITS_PER_WIDE_INT
2353 || integer_zerop (oprnd1))
079c527f
JJ
2354 return NULL;
2355
00f07b86
RH
2356 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2357 return NULL;
079c527f
JJ
2358
2359 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2360
2361 if (TYPE_UNSIGNED (itype))
363477c0 2362 {
079c527f
JJ
2363 unsigned HOST_WIDE_INT mh, ml;
2364 int pre_shift, post_shift;
6b58915b
RS
2365 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2366 & GET_MODE_MASK (TYPE_MODE (itype)));
5deb57cb 2367 tree t1, t2, t3, t4;
079c527f
JJ
2368
2369 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2370 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2371 return NULL;
2372
2373 /* Find a suitable multiplier and right shift count
2374 instead of multiplying with D. */
2375 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2376
2377 /* If the suggested multiplier is more than SIZE bits, we can do better
2378 for even divisors, using an initial right shift. */
2379 if (mh != 0 && (d & 1) == 0)
363477c0 2380 {
079c527f
JJ
2381 pre_shift = floor_log2 (d & -d);
2382 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2383 &ml, &post_shift, &dummy_int);
2384 gcc_assert (!mh);
2385 }
2386 else
2387 pre_shift = 0;
2388
2389 if (mh != 0)
2390 {
2391 if (post_shift - 1 >= prec)
2392 return NULL;
2393
5deb57cb
JJ
2394 /* t1 = oprnd0 h* ml;
2395 t2 = oprnd0 - t1;
2396 t3 = t2 >> 1;
2397 t4 = t1 + t3;
2398 q = t4 >> (post_shift - 1); */
2399 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2400 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2401 build_int_cst (itype, ml));
079c527f 2402 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2403
5deb57cb 2404 t2 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2405 def_stmt
0d0e4a03 2406 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
083481d8 2407 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
2408
2409 t3 = vect_recog_temp_ssa_var (itype, NULL);
2410 def_stmt
0d0e4a03 2411 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
079c527f
JJ
2412 append_pattern_def_seq (stmt_vinfo, def_stmt);
2413
5deb57cb 2414 t4 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2415 def_stmt
0d0e4a03 2416 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
079c527f
JJ
2417
2418 if (post_shift != 1)
2419 {
2420 append_pattern_def_seq (stmt_vinfo, def_stmt);
2421
5deb57cb 2422 q = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2423 pattern_stmt
0d0e4a03
JJ
2424 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2425 build_int_cst (itype, post_shift - 1));
079c527f
JJ
2426 }
2427 else
2428 {
5deb57cb 2429 q = t4;
079c527f
JJ
2430 pattern_stmt = def_stmt;
2431 }
363477c0
JJ
2432 }
2433 else
2434 {
079c527f
JJ
2435 if (pre_shift >= prec || post_shift >= prec)
2436 return NULL;
2437
2438 /* t1 = oprnd0 >> pre_shift;
5deb57cb
JJ
2439 t2 = t1 h* ml;
2440 q = t2 >> post_shift; */
079c527f
JJ
2441 if (pre_shift)
2442 {
2443 t1 = vect_recog_temp_ssa_var (itype, NULL);
2444 def_stmt
0d0e4a03
JJ
2445 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2446 build_int_cst (NULL, pre_shift));
079c527f
JJ
2447 append_pattern_def_seq (stmt_vinfo, def_stmt);
2448 }
2449 else
2450 t1 = oprnd0;
363477c0 2451
5deb57cb 2452 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2453 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2454 build_int_cst (itype, ml));
079c527f 2455
5deb57cb
JJ
2456 if (post_shift)
2457 {
2458 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2459
5deb57cb
JJ
2460 q = vect_recog_temp_ssa_var (itype, NULL);
2461 def_stmt
0d0e4a03
JJ
2462 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2463 build_int_cst (itype, post_shift));
5deb57cb
JJ
2464 }
2465 else
2466 q = t2;
2467
2468 pattern_stmt = def_stmt;
079c527f
JJ
2469 }
2470 }
2471 else
2472 {
2473 unsigned HOST_WIDE_INT ml;
4ee4c52c 2474 int post_shift;
6b58915b 2475 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
079c527f
JJ
2476 unsigned HOST_WIDE_INT abs_d;
2477 bool add = false;
5deb57cb 2478 tree t1, t2, t3, t4;
079c527f
JJ
2479
2480 /* Give up for -1. */
2481 if (d == -1)
2482 return NULL;
2483
079c527f
JJ
2484 /* Since d might be INT_MIN, we have to cast to
2485 unsigned HOST_WIDE_INT before negating to avoid
2486 undefined signed overflow. */
2487 abs_d = (d >= 0
2488 ? (unsigned HOST_WIDE_INT) d
2489 : - (unsigned HOST_WIDE_INT) d);
2490
2491 /* n rem d = n rem -d */
2492 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2493 {
2494 d = abs_d;
2495 oprnd1 = build_int_cst (itype, abs_d);
2496 }
2497 else if (HOST_BITS_PER_WIDE_INT >= prec
2498 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2499 /* This case is not handled correctly below. */
2500 return NULL;
2501
4ee4c52c 2502 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
079c527f
JJ
2503 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2504 {
2505 add = true;
2506 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2507 }
2508 if (post_shift >= prec)
2509 return NULL;
2510
7abed779 2511 /* t1 = oprnd0 h* ml; */
5deb57cb 2512 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2513 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2514 build_int_cst (itype, ml));
079c527f
JJ
2515
2516 if (add)
2517 {
5deb57cb 2518 /* t2 = t1 + oprnd0; */
7abed779 2519 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2520 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2521 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
079c527f
JJ
2522 }
2523 else
5deb57cb 2524 t2 = t1;
079c527f 2525
5deb57cb 2526 if (post_shift)
079c527f 2527 {
5deb57cb 2528 /* t3 = t2 >> post_shift; */
7abed779 2529 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2530 t3 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2531 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2532 build_int_cst (itype, post_shift));
363477c0 2533 }
079c527f 2534 else
5deb57cb 2535 t3 = t2;
079c527f 2536
807e902e 2537 wide_int oprnd0_min, oprnd0_max;
7abed779
JJ
2538 int msb = 1;
2539 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2540 {
807e902e 2541 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
7abed779 2542 msb = 0;
807e902e 2543 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
7abed779
JJ
2544 msb = -1;
2545 }
079c527f 2546
7abed779
JJ
2547 if (msb == 0 && d >= 0)
2548 {
2549 /* q = t3; */
2550 q = t3;
2551 pattern_stmt = def_stmt;
2552 }
2553 else
2554 {
2555 /* t4 = oprnd0 >> (prec - 1);
2556 or if we know from VRP that oprnd0 >= 0
2557 t4 = 0;
2558 or if we know from VRP that oprnd0 < 0
2559 t4 = -1; */
2560 append_pattern_def_seq (stmt_vinfo, def_stmt);
2561 t4 = vect_recog_temp_ssa_var (itype, NULL);
2562 if (msb != 1)
0d0e4a03
JJ
2563 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2564 build_int_cst (itype, msb));
7abed779 2565 else
0d0e4a03
JJ
2566 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2567 build_int_cst (itype, prec - 1));
7abed779
JJ
2568 append_pattern_def_seq (stmt_vinfo, def_stmt);
2569
2570 /* q = t3 - t4; or q = t4 - t3; */
2571 q = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2572 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2573 d < 0 ? t3 : t4);
7abed779 2574 }
079c527f
JJ
2575 }
2576
2577 if (rhs_code == TRUNC_MOD_EXPR)
2578 {
2579 tree r, t1;
2580
2581 /* We divided. Now finish by:
2582 t1 = q * oprnd1;
2583 r = oprnd0 - t1; */
2584 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2585
2586 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2587 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
083481d8 2588 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0 2589
079c527f 2590 r = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2591 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
363477c0
JJ
2592 }
2593
079c527f 2594 /* Pattern detected. */
73fbfcad 2595 if (dump_enabled_p ())
78c60e3d 2596 {
ccb3ad87 2597 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 2598 "vect_recog_divmod_pattern: detected: ");
ccb3ad87 2599 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 2600 dump_printf (MSG_NOTE, "\n");
78c60e3d 2601 }
363477c0 2602
9771b263 2603 stmts->safe_push (last_stmt);
363477c0
JJ
2604
2605 *type_in = vectype;
2606 *type_out = vectype;
2607 return pattern_stmt;
2608}
2609
69d2aade
JJ
2610/* Function vect_recog_mixed_size_cond_pattern
2611
2612 Try to find the following pattern:
2613
2614 type x_t, y_t;
2615 TYPE a_T, b_T, c_T;
2616 loop:
2617 S1 a_T = x_t CMP y_t ? b_T : c_T;
2618
2619 where type 'TYPE' is an integral type which has different size
bc4fb355 2620 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
69d2aade 2621 than 'type', the constants need to fit into an integer type
bc4fb355 2622 with the same width as 'type') or results of conversion from 'type'.
69d2aade
JJ
2623
2624 Input:
2625
2626 * LAST_STMT: A stmt from which the pattern search begins.
2627
2628 Output:
2629
2630 * TYPE_IN: The type of the input arguments to the pattern.
2631
2632 * TYPE_OUT: The type of the output of this pattern.
2633
2634 * Return value: A new stmt that will be used to replace the pattern.
2635 Additionally a def_stmt is added.
2636
2637 a_it = x_t CMP y_t ? b_it : c_it;
2638 a_T = (TYPE) a_it; */
2639
2640static gimple
9771b263 2641vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
69d2aade
JJ
2642 tree *type_out)
2643{
9771b263 2644 gimple last_stmt = (*stmts)[0];
69d2aade
JJ
2645 tree cond_expr, then_clause, else_clause;
2646 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
bc4fb355 2647 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
ef4bddc2 2648 machine_mode cmpmode;
69d2aade
JJ
2649 gimple pattern_stmt, def_stmt;
2650 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 2651 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
bc4fb355
IR
2652 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2653 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2654 bool promotion;
2655 tree comp_scalar_type;
69d2aade
JJ
2656
2657 if (!is_gimple_assign (last_stmt)
2658 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2659 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2660 return NULL;
2661
2662 cond_expr = gimple_assign_rhs1 (last_stmt);
2663 then_clause = gimple_assign_rhs2 (last_stmt);
2664 else_clause = gimple_assign_rhs3 (last_stmt);
2665
87aab9b2
JJ
2666 if (!COMPARISON_CLASS_P (cond_expr))
2667 return NULL;
2668
bc4fb355
IR
2669 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2670 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
87aab9b2 2671 if (comp_vectype == NULL_TREE)
69d2aade
JJ
2672 return NULL;
2673
2674 type = gimple_expr_type (last_stmt);
bc4fb355
IR
2675 if (types_compatible_p (type, comp_scalar_type)
2676 || ((TREE_CODE (then_clause) != INTEGER_CST
2677 || TREE_CODE (else_clause) != INTEGER_CST)
2678 && !INTEGRAL_TYPE_P (comp_scalar_type))
2679 || !INTEGRAL_TYPE_P (type))
2680 return NULL;
2681
2682 if ((TREE_CODE (then_clause) != INTEGER_CST
2683 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2684 &def_stmt0, &promotion))
2685 || (TREE_CODE (else_clause) != INTEGER_CST
2686 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2687 &def_stmt1, &promotion)))
2688 return NULL;
2689
2690 if (orig_type0 && orig_type1
2691 && !types_compatible_p (orig_type0, orig_type1))
2692 return NULL;
2693
2694 if (orig_type0)
2695 {
2696 if (!types_compatible_p (orig_type0, comp_scalar_type))
2697 return NULL;
2698 then_clause = gimple_assign_rhs1 (def_stmt0);
2699 itype = orig_type0;
2700 }
2701
2702 if (orig_type1)
2703 {
2704 if (!types_compatible_p (orig_type1, comp_scalar_type))
2705 return NULL;
2706 else_clause = gimple_assign_rhs1 (def_stmt1);
2707 itype = orig_type1;
2708 }
2709
69d2aade
JJ
2710 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2711
2712 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2713 return NULL;
2714
2715 vectype = get_vectype_for_scalar_type (type);
2716 if (vectype == NULL_TREE)
2717 return NULL;
2718
2719 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2720 return NULL;
2721
bc4fb355
IR
2722 if (itype == NULL_TREE)
2723 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2724 TYPE_UNSIGNED (type));
2725
69d2aade
JJ
2726 if (itype == NULL_TREE
2727 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2728 return NULL;
2729
2730 vecitype = get_vectype_for_scalar_type (itype);
2731 if (vecitype == NULL_TREE)
2732 return NULL;
2733
2734 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2735 return NULL;
2736
2737 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2738 {
bc4fb355
IR
2739 if ((TREE_CODE (then_clause) == INTEGER_CST
2740 && !int_fits_type_p (then_clause, itype))
2741 || (TREE_CODE (else_clause) == INTEGER_CST
2742 && !int_fits_type_p (else_clause, itype)))
69d2aade
JJ
2743 return NULL;
2744 }
2745
0d0e4a03
JJ
2746 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2747 COND_EXPR, unshare_expr (cond_expr),
2748 fold_convert (itype, then_clause),
2749 fold_convert (itype, else_clause));
2750 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2751 NOP_EXPR, gimple_assign_lhs (def_stmt));
69d2aade 2752
083481d8 2753 new_pattern_def_seq (stmt_vinfo, def_stmt);
f5709183 2754 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
69d2aade
JJ
2755 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2756 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2757 *type_in = vecitype;
2758 *type_out = vectype;
2759
73fbfcad 2760 if (dump_enabled_p ())
ccb3ad87 2761 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2762 "vect_recog_mixed_size_cond_pattern: detected:\n");
f5709183 2763
69d2aade
JJ
2764 return pattern_stmt;
2765}
2766
2767
71c92d17
JJ
2768/* Helper function of vect_recog_bool_pattern. Called recursively, return
2769 true if bool VAR can be optimized that way. */
2770
2771static bool
f5709183 2772check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
71c92d17
JJ
2773{
2774 gimple def_stmt;
2775 enum vect_def_type dt;
2776 tree def, rhs1;
2777 enum tree_code rhs_code;
2778
f5709183
IR
2779 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2780 &dt))
71c92d17
JJ
2781 return false;
2782
2783 if (dt != vect_internal_def)
2784 return false;
2785
2786 if (!is_gimple_assign (def_stmt))
2787 return false;
2788
2789 if (!has_single_use (def))
2790 return false;
2791
2792 rhs1 = gimple_assign_rhs1 (def_stmt);
2793 rhs_code = gimple_assign_rhs_code (def_stmt);
2794 switch (rhs_code)
2795 {
2796 case SSA_NAME:
f5709183 2797 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
71c92d17
JJ
2798
2799 CASE_CONVERT:
2800 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2801 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2802 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2803 return false;
f5709183 2804 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
71c92d17
JJ
2805
2806 case BIT_NOT_EXPR:
f5709183 2807 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
71c92d17
JJ
2808
2809 case BIT_AND_EXPR:
2810 case BIT_IOR_EXPR:
2811 case BIT_XOR_EXPR:
f5709183 2812 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
71c92d17 2813 return false;
f5709183
IR
2814 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2815 bb_vinfo);
71c92d17
JJ
2816
2817 default:
2818 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2819 {
2820 tree vecitype, comp_vectype;
2821
2f326699
JJ
2822 /* If the comparison can throw, then is_gimple_condexpr will be
2823 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2824 if (stmt_could_throw_p (def_stmt))
2825 return false;
2826
71c92d17
JJ
2827 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2828 if (comp_vectype == NULL_TREE)
2829 return false;
2830
2831 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2832 {
ef4bddc2 2833 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 2834 tree itype
ab0ef706 2835 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
2836 vecitype = get_vectype_for_scalar_type (itype);
2837 if (vecitype == NULL_TREE)
2838 return false;
2839 }
2840 else
2841 vecitype = comp_vectype;
2842 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2843 }
2844 return false;
2845 }
2846}
2847
2848
2849/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2850 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
363477c0 2851 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
71c92d17
JJ
2852
2853static tree
2854adjust_bool_pattern_cast (tree type, tree var)
2855{
2856 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2857 gimple cast_stmt, pattern_stmt;
2858
363477c0 2859 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
71c92d17 2860 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
083481d8 2861 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
0d0e4a03
JJ
2862 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2863 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
71c92d17
JJ
2864 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2865 return gimple_assign_lhs (cast_stmt);
2866}
2867
2868
2869/* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2870 recursively. VAR is an SSA_NAME that should be transformed from bool
2871 to a wider integer type, OUT_TYPE is the desired final integer type of
2872 the whole pattern, TRUEVAL should be NULL unless optimizing
2873 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2874 in the then_clause, STMTS is where statements with added pattern stmts
2875 should be pushed to. */
2876
2877static tree
2878adjust_bool_pattern (tree var, tree out_type, tree trueval,
9771b263 2879 vec<gimple> *stmts)
71c92d17
JJ
2880{
2881 gimple stmt = SSA_NAME_DEF_STMT (var);
2882 enum tree_code rhs_code, def_rhs_code;
2883 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2884 location_t loc;
2885 gimple pattern_stmt, def_stmt;
2886
2887 rhs1 = gimple_assign_rhs1 (stmt);
2888 rhs2 = gimple_assign_rhs2 (stmt);
2889 rhs_code = gimple_assign_rhs_code (stmt);
2890 loc = gimple_location (stmt);
2891 switch (rhs_code)
2892 {
2893 case SSA_NAME:
2894 CASE_CONVERT:
2895 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2896 itype = TREE_TYPE (irhs1);
2897 pattern_stmt
0d0e4a03
JJ
2898 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2899 SSA_NAME, irhs1);
71c92d17
JJ
2900 break;
2901
2902 case BIT_NOT_EXPR:
2903 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2904 itype = TREE_TYPE (irhs1);
2905 pattern_stmt
0d0e4a03
JJ
2906 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2907 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
71c92d17
JJ
2908 break;
2909
2910 case BIT_AND_EXPR:
2911 /* Try to optimize x = y & (a < b ? 1 : 0); into
2912 x = (a < b ? y : 0);
2913
2914 E.g. for:
2915 bool a_b, b_b, c_b;
2916 TYPE d_T;
2917
2918 S1 a_b = x1 CMP1 y1;
2919 S2 b_b = x2 CMP2 y2;
2920 S3 c_b = a_b & b_b;
2921 S4 d_T = (TYPE) c_b;
2922
2923 we would normally emit:
2924
2925 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2926 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2927 S3' c_T = a_T & b_T;
2928 S4' d_T = c_T;
2929
2930 but we can save one stmt by using the
2931 result of one of the COND_EXPRs in the other COND_EXPR and leave
2932 BIT_AND_EXPR stmt out:
2933
2934 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2935 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2936 S4' f_T = c_T;
2937
2938 At least when VEC_COND_EXPR is implemented using masks
2939 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2940 computes the comparison masks and ands it, in one case with
2941 all ones vector, in the other case with a vector register.
2942 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2943 often more expensive. */
2944 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2945 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2946 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2947 {
2948 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2949 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2950 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2951 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2952 {
2953 gimple tstmt;
2954 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2955 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
9771b263 2956 tstmt = stmts->pop ();
71c92d17 2957 gcc_assert (tstmt == def_stmt);
9771b263 2958 stmts->quick_push (stmt);
71c92d17
JJ
2959 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2960 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 2961 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
2962 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2963 return irhs2;
2964 }
2965 else
2966 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2967 goto and_ior_xor;
2968 }
2969 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2970 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2971 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2972 {
2973 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2974 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2975 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2976 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2977 {
2978 gimple tstmt;
2979 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2980 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
9771b263 2981 tstmt = stmts->pop ();
71c92d17 2982 gcc_assert (tstmt == def_stmt);
9771b263 2983 stmts->quick_push (stmt);
71c92d17
JJ
2984 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2985 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 2986 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
2987 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2988 return irhs1;
2989 }
2990 else
2991 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2992 goto and_ior_xor;
2993 }
2994 /* FALLTHRU */
2995 case BIT_IOR_EXPR:
2996 case BIT_XOR_EXPR:
2997 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2998 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2999 and_ior_xor:
3000 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3001 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3002 {
3003 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3004 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3005 int out_prec = TYPE_PRECISION (out_type);
3006 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3007 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3008 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3009 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3010 else
3011 {
3012 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3013 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3014 }
3015 }
3016 itype = TREE_TYPE (irhs1);
3017 pattern_stmt
0d0e4a03
JJ
3018 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3019 rhs_code, irhs1, irhs2);
71c92d17
JJ
3020 break;
3021
3022 default:
3023 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3024 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
e6a21dd2
JJ
3025 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3026 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3027 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
71c92d17 3028 {
ef4bddc2 3029 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3030 itype
ab0ef706 3031 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3032 }
3033 else
3034 itype = TREE_TYPE (rhs1);
3035 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3036 if (trueval == NULL_TREE)
3037 trueval = build_int_cst (itype, 1);
3038 else
3039 gcc_checking_assert (useless_type_conversion_p (itype,
3040 TREE_TYPE (trueval)));
3041 pattern_stmt
0d0e4a03
JJ
3042 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3043 COND_EXPR, cond_expr, trueval,
3044 build_int_cst (itype, 0));
71c92d17
JJ
3045 break;
3046 }
3047
9771b263 3048 stmts->safe_push (stmt);
71c92d17
JJ
3049 gimple_set_location (pattern_stmt, loc);
3050 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3051 return gimple_assign_lhs (pattern_stmt);
3052}
3053
3054
3055/* Function vect_recog_bool_pattern
3056
3057 Try to find pattern like following:
3058
3059 bool a_b, b_b, c_b, d_b, e_b;
3060 TYPE f_T;
3061 loop:
3062 S1 a_b = x1 CMP1 y1;
3063 S2 b_b = x2 CMP2 y2;
3064 S3 c_b = a_b & b_b;
3065 S4 d_b = x3 CMP3 y3;
3066 S5 e_b = c_b | d_b;
3067 S6 f_T = (TYPE) e_b;
3068
52264dbf
RB
3069 where type 'TYPE' is an integral type. Or a similar pattern
3070 ending in
3071
3072 S6 f_Y = e_b ? r_Y : s_Y;
3073
3074 as results from if-conversion of a complex condition.
71c92d17
JJ
3075
3076 Input:
3077
3078 * LAST_STMT: A stmt at the end from which the pattern
3079 search begins, i.e. cast of a bool to
3080 an integer type.
3081
3082 Output:
3083
3084 * TYPE_IN: The type of the input arguments to the pattern.
3085
3086 * TYPE_OUT: The type of the output of this pattern.
3087
3088 * Return value: A new stmt that will be used to replace the pattern.
3089
3090 Assuming size of TYPE is the same as size of all comparisons
3091 (otherwise some casts would be added where needed), the above
3092 sequence we create related pattern stmts:
3093 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3094 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3095 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3096 S5' e_T = c_T | d_T;
3097 S6' f_T = e_T;
3098
3099 Instead of the above S3' we could emit:
3100 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3101 S3' c_T = a_T | b_T;
3102 but the above is more efficient. */
3103
3104static gimple
9771b263 3105vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
71c92d17
JJ
3106 tree *type_out)
3107{
9771b263 3108 gimple last_stmt = stmts->pop ();
71c92d17
JJ
3109 enum tree_code rhs_code;
3110 tree var, lhs, rhs, vectype;
3111 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 3113 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
71c92d17
JJ
3114 gimple pattern_stmt;
3115
3116 if (!is_gimple_assign (last_stmt))
3117 return NULL;
3118
3119 var = gimple_assign_rhs1 (last_stmt);
3120 lhs = gimple_assign_lhs (last_stmt);
3121
3122 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3123 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3124 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3125 return NULL;
3126
3127 rhs_code = gimple_assign_rhs_code (last_stmt);
3128 if (CONVERT_EXPR_CODE_P (rhs_code))
3129 {
78048b1c
JJ
3130 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3131 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
71c92d17
JJ
3132 return NULL;
3133 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3134 if (vectype == NULL_TREE)
3135 return NULL;
3136
f5709183 3137 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
71c92d17
JJ
3138 return NULL;
3139
3140 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3141 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3142 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
0d0e4a03 3143 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
71c92d17
JJ
3144 else
3145 pattern_stmt
0d0e4a03 3146 = gimple_build_assign (lhs, NOP_EXPR, rhs);
71c92d17
JJ
3147 *type_out = vectype;
3148 *type_in = vectype;
9771b263 3149 stmts->safe_push (last_stmt);
73fbfcad 3150 if (dump_enabled_p ())
ccb3ad87 3151 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3152 "vect_recog_bool_pattern: detected:\n");
f5709183 3153
52264dbf
RB
3154 return pattern_stmt;
3155 }
3156 else if (rhs_code == COND_EXPR
3157 && TREE_CODE (var) == SSA_NAME)
3158 {
3159 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3160 if (vectype == NULL_TREE)
3161 return NULL;
3162
3163 /* Build a scalar type for the boolean result that when
3164 vectorized matches the vector type of the result in
3165 size and number of elements. */
3166 unsigned prec
3167 = wi::udiv_trunc (TYPE_SIZE (vectype),
3168 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3169 tree type
3170 = build_nonstandard_integer_type (prec,
3171 TYPE_UNSIGNED (TREE_TYPE (var)));
3172 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3173 return NULL;
3174
3175 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3176 return NULL;
3177
3178 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3179 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3180 pattern_stmt
0d0e4a03
JJ
3181 = gimple_build_assign (lhs, COND_EXPR,
3182 build2 (NE_EXPR, boolean_type_node,
3183 rhs, build_int_cst (type, 0)),
3184 gimple_assign_rhs2 (last_stmt),
3185 gimple_assign_rhs3 (last_stmt));
52264dbf
RB
3186 *type_out = vectype;
3187 *type_in = vectype;
3188 stmts->safe_push (last_stmt);
3189 if (dump_enabled_p ())
3190 dump_printf_loc (MSG_NOTE, vect_location,
3191 "vect_recog_bool_pattern: detected:\n");
3192
71c92d17
JJ
3193 return pattern_stmt;
3194 }
ab0ef706
JJ
3195 else if (rhs_code == SSA_NAME
3196 && STMT_VINFO_DATA_REF (stmt_vinfo))
3197 {
3198 stmt_vec_info pattern_stmt_info;
3199 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3200 gcc_assert (vectype != NULL_TREE);
78336739
JJ
3201 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3202 return NULL;
f5709183 3203 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
ab0ef706
JJ
3204 return NULL;
3205
3206 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3207 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3208 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3209 {
3210 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
0d0e4a03 3211 gimple cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
083481d8 3212 new_pattern_def_seq (stmt_vinfo, cast_stmt);
ab0ef706
JJ
3213 rhs = rhs2;
3214 }
0d0e4a03 3215 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
f5709183
IR
3216 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3217 bb_vinfo);
ab0ef706
JJ
3218 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3219 STMT_VINFO_DATA_REF (pattern_stmt_info)
3220 = STMT_VINFO_DATA_REF (stmt_vinfo);
3221 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3222 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3223 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3224 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3225 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3226 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3227 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3228 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
78048b1c 3229 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
ab0ef706
JJ
3230 *type_out = vectype;
3231 *type_in = vectype;
9771b263 3232 stmts->safe_push (last_stmt);
73fbfcad 3233 if (dump_enabled_p ())
ccb3ad87 3234 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3235 "vect_recog_bool_pattern: detected:\n");
ab0ef706
JJ
3236 return pattern_stmt;
3237 }
71c92d17
JJ
3238 else
3239 return NULL;
3240}
3241
3242
1107f3ae
IR
3243/* Mark statements that are involved in a pattern. */
3244
3245static inline void
3246vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3247 tree pattern_vectype)
3248{
3249 stmt_vec_info pattern_stmt_info, def_stmt_info;
3250 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3251 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
f5709183 3252 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
1107f3ae
IR
3253 gimple def_stmt;
3254
1107f3ae 3255 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
ab0ef706
JJ
3256 if (pattern_stmt_info == NULL)
3257 {
f5709183
IR
3258 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3259 bb_vinfo);
ab0ef706
JJ
3260 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3261 }
3262 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1107f3ae
IR
3263
3264 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3265 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
ab0ef706 3266 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1107f3ae
IR
3267 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3268 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3269 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
363477c0
JJ
3270 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3271 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3272 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
1107f3ae 3273 {
363477c0
JJ
3274 gimple_stmt_iterator si;
3275 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3276 !gsi_end_p (si); gsi_next (&si))
69d2aade 3277 {
363477c0
JJ
3278 def_stmt = gsi_stmt (si);
3279 def_stmt_info = vinfo_for_stmt (def_stmt);
3280 if (def_stmt_info == NULL)
3281 {
f5709183
IR
3282 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3283 bb_vinfo);
363477c0
JJ
3284 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3285 }
3286 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3287 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
56f8faae 3288 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
363477c0
JJ
3289 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3290 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
69d2aade 3291 }
1107f3ae
IR
3292 }
3293}
3294
b8698a0f 3295/* Function vect_pattern_recog_1
20f06221
DN
3296
3297 Input:
3298 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3299 computation pattern.
3300 STMT: A stmt from which the pattern search should start.
3301
3302 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
b8698a0f
L
3303 expression that computes the same functionality and can be used to
3304 replace the sequence of stmts that are involved in the pattern.
20f06221
DN
3305
3306 Output:
b8698a0f
L
3307 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3308 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3309 relevant vector type. If 'TYPE_IN' is already a vector type, then this
20f06221
DN
3310 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3311 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3312 to the available target pattern.
3313
b8698a0f 3314 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
3315 for vect_recog_pattern. */
3316
3317static void
92aea285
JJ
3318vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3319 gimple_stmt_iterator si,
9771b263 3320 vec<gimple> *stmts_to_replace)
20f06221 3321{
726a989a 3322 gimple stmt = gsi_stmt (si), pattern_stmt;
383d9c83 3323 stmt_vec_info stmt_info;
383d9c83 3324 loop_vec_info loop_vinfo;
20f06221
DN
3325 tree pattern_vectype;
3326 tree type_in, type_out;
20f06221 3327 enum tree_code code;
b5aeb3bb
IR
3328 int i;
3329 gimple next;
20f06221 3330
9771b263
DN
3331 stmts_to_replace->truncate (0);
3332 stmts_to_replace->quick_push (stmt);
d1fc143d 3333 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
726a989a 3334 if (!pattern_stmt)
b8698a0f
L
3335 return;
3336
9771b263 3337 stmt = stmts_to_replace->last ();
383d9c83
IR
3338 stmt_info = vinfo_for_stmt (stmt);
3339 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3340
b8698a0f
L
3341 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3342 {
3343 /* No need to check target support (already checked by the pattern
3344 recognition function). */
b690cc0f 3345 pattern_vectype = type_out ? type_out : type_in;
20f06221
DN
3346 }
3347 else
3348 {
ef4bddc2 3349 machine_mode vec_mode;
20f06221
DN
3350 enum insn_code icode;
3351 optab optab;
3352
3353 /* Check target support */
b690cc0f
RG
3354 type_in = get_vectype_for_scalar_type (type_in);
3355 if (!type_in)
3356 return;
3357 if (type_out)
3358 type_out = get_vectype_for_scalar_type (type_out);
3359 else
3360 type_out = type_in;
15bbc165
AO
3361 if (!type_out)
3362 return;
b690cc0f 3363 pattern_vectype = type_out;
03d3e953 3364
726a989a
RB
3365 if (is_gimple_assign (pattern_stmt))
3366 code = gimple_assign_rhs_code (pattern_stmt);
3367 else
3368 {
3369 gcc_assert (is_gimple_call (pattern_stmt));
3370 code = CALL_EXPR;
3371 }
3372
b690cc0f
RG
3373 optab = optab_for_tree_code (code, type_in, optab_default);
3374 vec_mode = TYPE_MODE (type_in);
20f06221 3375 if (!optab
947131ba 3376 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
b690cc0f 3377 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
20f06221
DN
3378 return;
3379 }
3380
3381 /* Found a vectorizable pattern. */
73fbfcad 3382 if (dump_enabled_p ())
20f06221 3383 {
ccb3ad87 3384 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 3385 "pattern recognized: ");
ccb3ad87 3386 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
20f06221 3387 }
b8698a0f 3388
726a989a 3389 /* Mark the stmts that are involved in the pattern. */
1107f3ae 3390 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
20f06221 3391
b5aeb3bb
IR
3392 /* Patterns cannot be vectorized using SLP, because they change the order of
3393 computation. */
f5709183 3394 if (loop_vinfo)
9771b263 3395 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
f5709183 3396 if (next == stmt)
9771b263 3397 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
51312233 3398
1107f3ae
IR
3399 /* It is possible that additional pattern stmts are created and inserted in
3400 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3401 relevant statements. */
9771b263
DN
3402 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3403 && (unsigned) i < (stmts_to_replace->length () - 1);
51312233
IR
3404 i++)
3405 {
3406 stmt_info = vinfo_for_stmt (stmt);
3407 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
73fbfcad 3408 if (dump_enabled_p ())
51312233 3409 {
ccb3ad87 3410 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 3411 "additional pattern stmt: ");
ccb3ad87 3412 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
51312233
IR
3413 }
3414
1107f3ae 3415 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
51312233 3416 }
20f06221
DN
3417}
3418
3419
3420/* Function vect_pattern_recog
3421
3422 Input:
3423 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3424 computation idioms.
3425
9d5e7640
IR
3426 Output - for each computation idiom that is detected we create a new stmt
3427 that provides the same functionality and that can be vectorized. We
20f06221
DN
3428 also record some information in the struct_stmt_info of the relevant
3429 stmts, as explained below:
3430
3431 At the entry to this function we have the following stmts, with the
3432 following initial value in the STMT_VINFO fields:
3433
3434 stmt in_pattern_p related_stmt vec_stmt
3435 S1: a_i = .... - - -
3436 S2: a_2 = ..use(a_i).. - - -
3437 S3: a_1 = ..use(a_2).. - - -
3438 S4: a_0 = ..use(a_1).. - - -
3439 S5: ... = ..use(a_0).. - - -
3440
3441 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
3442 represented by a single stmt. We then:
3443 - create a new stmt S6 equivalent to the pattern (the stmt is not
3444 inserted into the code)
20f06221
DN
3445 - fill in the STMT_VINFO fields as follows:
3446
3447 in_pattern_p related_stmt vec_stmt
b8698a0f 3448 S1: a_i = .... - - -
20f06221
DN
3449 S2: a_2 = ..use(a_i).. - - -
3450 S3: a_1 = ..use(a_2).. - - -
20f06221 3451 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 3452 '---> S6: a_new = .... - S4 -
20f06221
DN
3453 S5: ... = ..use(a_0).. - - -
3454
3455 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 3456 to each other through the RELATED_STMT field).
20f06221
DN
3457
3458 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3459 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3460 remain irrelevant unless used by stmts other than S4.
3461
3462 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 3463 (because they are marked as irrelevant). It will vectorize S6, and record
83197f37
IR
3464 a pointer to the new vector stmt VS6 from S6 (as usual).
3465 S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
3466
3467 in_pattern_p related_stmt vec_stmt
3468 S1: a_i = .... - - -
3469 S2: a_2 = ..use(a_i).. - - -
3470 S3: a_1 = ..use(a_2).. - - -
3471 > VS6: va_new = .... - - -
20f06221 3472 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 3473 '---> S6: a_new = .... - S4 VS6
20f06221
DN
3474 > VS5: ... = ..vuse(va_new).. - - -
3475 S5: ... = ..use(a_0).. - - -
3476
9d5e7640 3477 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
3478 elsewhere), and we'll end up with:
3479
b8698a0f 3480 VS6: va_new = ....
83197f37
IR
3481 VS5: ... = ..vuse(va_new)..
3482
3483 In case of more than one pattern statements, e.g., widen-mult with
3484 intermediate type:
3485
3486 S1 a_t = ;
3487 S2 a_T = (TYPE) a_t;
3488 '--> S3: a_it = (interm_type) a_t;
3489 S4 prod_T = a_T * CONST;
3490 '--> S5: prod_T' = a_it w* CONST;
3491
3492 there may be other users of a_T outside the pattern. In that case S2 will
3493 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3494 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3495 be recorded in S3. */
20f06221
DN
3496
3497void
f5709183 3498vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
20f06221 3499{
f5709183 3500 struct loop *loop;
772e61e1 3501 basic_block *bbs;
f5709183 3502 unsigned int nbbs;
726a989a 3503 gimple_stmt_iterator si;
20f06221 3504 unsigned int i, j;
92aea285 3505 vect_recog_func_ptr vect_recog_func;
00f96dc9 3506 auto_vec<gimple, 1> stmts_to_replace;
f5709183 3507 gimple stmt;
20f06221 3508
73fbfcad 3509 if (dump_enabled_p ())
78c60e3d 3510 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3511 "=== vect_pattern_recog ===\n");
20f06221 3512
f5709183
IR
3513 if (loop_vinfo)
3514 {
3515 loop = LOOP_VINFO_LOOP (loop_vinfo);
3516 bbs = LOOP_VINFO_BBS (loop_vinfo);
3517 nbbs = loop->num_nodes;
3518 }
3519 else
3520 {
772e61e1 3521 bbs = &BB_VINFO_BB (bb_vinfo);
f5709183 3522 nbbs = 1;
f5709183
IR
3523 }
3524
20f06221
DN
3525 /* Scan through the loop stmts, applying the pattern recognition
3526 functions starting at each stmt visited: */
3527 for (i = 0; i < nbbs; i++)
3528 {
3529 basic_block bb = bbs[i];
726a989a 3530 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
20f06221 3531 {
f5709183
IR
3532 if (bb_vinfo && (stmt = gsi_stmt (si))
3533 && vinfo_for_stmt (stmt)
3534 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3535 continue;
3536
20f06221
DN
3537 /* Scan over all generic vect_recog_xxx_pattern functions. */
3538 for (j = 0; j < NUM_PATTERNS; j++)
3539 {
92aea285
JJ
3540 vect_recog_func = vect_vect_recog_func_ptrs[j];
3541 vect_pattern_recog_1 (vect_recog_func, si,
d1fc143d 3542 &stmts_to_replace);
20f06221
DN
3543 }
3544 }
3545 }
3546}