]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
Remove configure check for unused fdopen.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
4fb489e7
JJ
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
20f06221
DN
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
9dcd6f09 10Software Foundation; either version 3, or (at your option) any later
20f06221
DN
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
9dcd6f09
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
20f06221
DN
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "ggc.h"
27#include "tree.h"
20f06221
DN
28#include "target.h"
29#include "basic-block.h"
cf835838 30#include "gimple-pretty-print.h"
20f06221
DN
31#include "tree-flow.h"
32#include "tree-dump.h"
20f06221
DN
33#include "cfgloop.h"
34#include "expr.h"
35#include "optabs.h"
36#include "params.h"
37#include "tree-data-ref.h"
38#include "tree-vectorizer.h"
39#include "recog.h"
718f9c0f 40#include "diagnostic-core.h"
20f06221 41
20f06221 42/* Pattern recognition functions */
51312233
IR
43static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
1107f3ae
IR
50static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
36ba4aae
IR
52static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
732a0ad3
JJ
54static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55 tree *, tree *);
363477c0
JJ
56static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
57 tree *, tree *);
69d2aade
JJ
58static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
59 tree *, tree *);
71c92d17 60static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
20f06221
DN
61static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
0b2229b0 64 vect_recog_dot_prod_pattern,
1107f3ae 65 vect_recog_pow_pattern,
69d2aade 66 vect_recog_over_widening_pattern,
36ba4aae 67 vect_recog_widen_shift_pattern,
732a0ad3 68 vect_recog_vector_vector_shift_pattern,
363477c0 69 vect_recog_sdivmod_pow2_pattern,
71c92d17
JJ
70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
20f06221 72
083481d8
JJ
73static inline void
74append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
75{
a1a6c5b2
JJ
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77 stmt);
083481d8
JJ
78}
79
80static inline void
81new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
82{
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84 append_pattern_def_seq (stmt_info, stmt);
85}
86
20f06221
DN
87/* Function widened_name_p
88
89 Check whether NAME, an ssa-name used in USE_STMT,
90 is a result of a type-promotion, such that:
91 DEF_STMT: NAME = NOP (name0)
b8698a0f 92 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
383d9c83
IR
93 If CHECK_SIGN is TRUE, check that either both types are signed or both are
94 unsigned. */
20f06221
DN
95
96static bool
383d9c83
IR
97widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
98 bool check_sign)
20f06221
DN
99{
100 tree dummy;
726a989a 101 gimple dummy_gimple;
20f06221
DN
102 loop_vec_info loop_vinfo;
103 stmt_vec_info stmt_vinfo;
20f06221
DN
104 tree type = TREE_TYPE (name);
105 tree oprnd0;
106 enum vect_def_type dt;
107 tree def;
108
109 stmt_vinfo = vinfo_for_stmt (use_stmt);
110 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
111
a70d6342 112 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
20f06221
DN
113 return false;
114
8644a673
IR
115 if (dt != vect_internal_def
116 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
117 return false;
118
119 if (! *def_stmt)
120 return false;
121
726a989a 122 if (!is_gimple_assign (*def_stmt))
20f06221
DN
123 return false;
124
726a989a 125 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
20f06221
DN
126 return false;
127
726a989a 128 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221
DN
129
130 *half_type = TREE_TYPE (oprnd0);
131 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
383d9c83 132 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
20f06221
DN
133 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
134 return false;
135
b8698a0f 136 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
a70d6342 137 &dt))
20f06221
DN
138 return false;
139
20f06221
DN
140 return true;
141}
142
726a989a
RB
143/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
144 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
145
146static tree
147vect_recog_temp_ssa_var (tree type, gimple stmt)
148{
149 tree var = create_tmp_var (type, "patt");
150
151 add_referenced_var (var);
152 var = make_ssa_name (var, stmt);
153 return var;
154}
20f06221
DN
155
156/* Function vect_recog_dot_prod_pattern
157
158 Try to find the following pattern:
159
160 type x_t, y_t;
161 TYPE1 prod;
162 TYPE2 sum = init;
163 loop:
164 sum_0 = phi <init, sum_1>
165 S1 x_t = ...
166 S2 y_t = ...
167 S3 x_T = (TYPE1) x_t;
168 S4 y_T = (TYPE1) y_t;
169 S5 prod = x_T * y_T;
170 [S6 prod = (TYPE2) prod; #optional]
171 S7 sum_1 = prod + sum_0;
172
b8698a0f
L
173 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
174 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 175 computation.
b8698a0f 176
20f06221
DN
177 Input:
178
51312233
IR
179 * STMTS: Contains a stmt from which the pattern search begins. In the
180 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
181 will be detected.
20f06221
DN
182
183 Output:
184
185 * TYPE_IN: The type of the input arguments to the pattern.
186
187 * TYPE_OUT: The type of the output of this pattern.
188
189 * Return value: A new stmt that will be used to replace the sequence of
190 stmts that constitute the pattern. In this case it will be:
191 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
192
193 Note: The dot-prod idiom is a widening reduction pattern that is
194 vectorized without preserving all the intermediate results. It
195 produces only N/2 (widened) results (by summing up pairs of
196 intermediate results) rather than all N results. Therefore, we
197 cannot allow this pattern when we want to get all the results and in
198 the correct order (as is the case when this computation is in an
199 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 200
726a989a 201static gimple
51312233
IR
202vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
203 tree *type_out)
20f06221 204{
51312233 205 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
20f06221
DN
206 tree oprnd0, oprnd1;
207 tree oprnd00, oprnd01;
51312233 208 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 209 tree type, half_type;
726a989a 210 gimple pattern_stmt;
20f06221 211 tree prod_type;
d29de1bf
DN
212 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
213 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
f471fe72 214 tree var;
20f06221 215
51312233 216 if (!is_gimple_assign (last_stmt))
20f06221
DN
217 return NULL;
218
51312233 219 type = gimple_expr_type (last_stmt);
20f06221 220
b8698a0f 221 /* Look for the following pattern
20f06221
DN
222 DX = (TYPE1) X;
223 DY = (TYPE1) Y;
b8698a0f 224 DPROD = DX * DY;
20f06221
DN
225 DDPROD = (TYPE2) DPROD;
226 sum_1 = DDPROD + sum_0;
b8698a0f 227 In which
20f06221
DN
228 - DX is double the size of X
229 - DY is double the size of Y
230 - DX, DY, DPROD all have the same type
231 - sum is the same size of DPROD or bigger
232 - sum has been recognized as a reduction variable.
233
234 This is equivalent to:
235 DPROD = X w* Y; #widen mult
236 sum_1 = DPROD w+ sum_0; #widen summation
237 or
238 DPROD = X w* Y; #widen mult
239 sum_1 = DPROD + sum_0; #summation
240 */
241
242 /* Starting from LAST_STMT, follow the defs of its uses in search
243 of the above pattern. */
244
51312233 245 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
246 return NULL;
247
248 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
249 {
250 /* Has been detected as widening-summation? */
251
252 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a
RB
253 type = gimple_expr_type (stmt);
254 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
20f06221 255 return NULL;
726a989a
RB
256 oprnd0 = gimple_assign_rhs1 (stmt);
257 oprnd1 = gimple_assign_rhs2 (stmt);
20f06221
DN
258 half_type = TREE_TYPE (oprnd0);
259 }
260 else
261 {
726a989a 262 gimple def_stmt;
20f06221
DN
263
264 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
265 return NULL;
51312233
IR
266 oprnd0 = gimple_assign_rhs1 (last_stmt);
267 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
268 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
269 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221 270 return NULL;
51312233 271 stmt = last_stmt;
20f06221 272
383d9c83 273 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
20f06221
DN
274 {
275 stmt = def_stmt;
726a989a 276 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
277 }
278 else
279 half_type = type;
280 }
281
51312233 282 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
283 we know that oprnd1 is the reduction variable (defined by a loop-header
284 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
285 Left to check that oprnd0 is defined by a (widen_)mult_expr */
ba02d3bc
RG
286 if (TREE_CODE (oprnd0) != SSA_NAME)
287 return NULL;
20f06221
DN
288
289 prod_type = half_type;
290 stmt = SSA_NAME_DEF_STMT (oprnd0);
3cb35c12
CF
291
292 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
75264e61 293 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
3cb35c12
CF
294 return NULL;
295
b8698a0f 296 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 297 inside the loop (in case we are analyzing an outer-loop). */
726a989a 298 if (!is_gimple_assign (stmt))
b8698a0f 299 return NULL;
20f06221
DN
300 stmt_vinfo = vinfo_for_stmt (stmt);
301 gcc_assert (stmt_vinfo);
8644a673 302 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
b3130586 303 return NULL;
726a989a 304 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
20f06221
DN
305 return NULL;
306 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
307 {
308 /* Has been detected as a widening multiplication? */
309
310 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a 311 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
20f06221
DN
312 return NULL;
313 stmt_vinfo = vinfo_for_stmt (stmt);
314 gcc_assert (stmt_vinfo);
8644a673 315 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
726a989a
RB
316 oprnd00 = gimple_assign_rhs1 (stmt);
317 oprnd01 = gimple_assign_rhs2 (stmt);
20f06221
DN
318 }
319 else
320 {
321 tree half_type0, half_type1;
726a989a 322 gimple def_stmt;
20f06221
DN
323 tree oprnd0, oprnd1;
324
726a989a
RB
325 oprnd0 = gimple_assign_rhs1 (stmt);
326 oprnd1 = gimple_assign_rhs2 (stmt);
9600efe1
MM
327 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
328 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
20f06221 329 return NULL;
383d9c83 330 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
20f06221 331 return NULL;
726a989a 332 oprnd00 = gimple_assign_rhs1 (def_stmt);
383d9c83 333 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
20f06221 334 return NULL;
726a989a 335 oprnd01 = gimple_assign_rhs1 (def_stmt);
9600efe1 336 if (!types_compatible_p (half_type0, half_type1))
20f06221
DN
337 return NULL;
338 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
339 return NULL;
340 }
341
342 half_type = TREE_TYPE (oprnd00);
343 *type_in = half_type;
344 *type_out = type;
b8698a0f 345
20f06221 346 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 347 var = vect_recog_temp_ssa_var (type, NULL);
f471fe72
RG
348 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
349 oprnd00, oprnd01, oprnd1);
b8698a0f 350
20f06221
DN
351 if (vect_print_dump_info (REPORT_DETAILS))
352 {
353 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
726a989a 354 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
20f06221 355 }
d29de1bf
DN
356
357 /* We don't allow changing the order of the computation in the inner-loop
358 when doing outer-loop vectorization. */
51312233 359 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
d29de1bf 360
726a989a 361 return pattern_stmt;
20f06221 362}
b8698a0f 363
51312233 364
36ba4aae
IR
365/* Handle widening operation by a constant. At the moment we support MULT_EXPR
366 and LSHIFT_EXPR.
367
368 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
369 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
51312233
IR
370
371 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
36ba4aae
IR
372 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
373 that satisfies the above restrictions, we can perform a widening opeartion
374 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
375 with a_it = (interm_type) a_t; */
51312233
IR
376
377static bool
36ba4aae
IR
378vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
379 tree const_oprnd, tree *oprnd,
380 VEC (gimple, heap) **stmts, tree type,
381 tree *half_type, gimple def_stmt)
51312233
IR
382{
383 tree new_type, new_oprnd, tmp;
384 gimple new_stmt;
ad949bcc
JJ
385 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
386 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
51312233 387
36ba4aae
IR
388 if (code != MULT_EXPR && code != LSHIFT_EXPR)
389 return false;
390
391 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
392 || (code == LSHIFT_EXPR
393 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
394 != 1))
395 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
51312233
IR
396 {
397 /* CONST_OPRND is a constant of HALF_TYPE. */
398 *oprnd = gimple_assign_rhs1 (def_stmt);
399 return true;
400 }
401
402 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
ad949bcc
JJ
403 || !gimple_bb (def_stmt)
404 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
51312233
IR
405 || !vinfo_for_stmt (def_stmt))
406 return false;
407
36ba4aae 408 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
51312233
IR
409 a type 2 times bigger than HALF_TYPE. */
410 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
411 TYPE_UNSIGNED (type));
36ba4aae
IR
412 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
413 || (code == LSHIFT_EXPR
414 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
51312233
IR
415 return false;
416
36ba4aae 417 /* Use NEW_TYPE for widening operation. */
51312233
IR
418 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
419 {
420 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
421 /* Check if the already created pattern stmt is what we need. */
422 if (!is_gimple_assign (new_stmt)
423 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
424 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
425 return false;
426
3a0a3578 427 VEC_safe_push (gimple, heap, *stmts, def_stmt);
51312233
IR
428 *oprnd = gimple_assign_lhs (new_stmt);
429 }
430 else
431 {
432 /* Create a_T = (NEW_TYPE) a_t; */
433 *oprnd = gimple_assign_rhs1 (def_stmt);
434 tmp = create_tmp_var (new_type, NULL);
435 add_referenced_var (tmp);
436 new_oprnd = make_ssa_name (tmp, NULL);
437 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
438 NULL_TREE);
51312233
IR
439 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
440 VEC_safe_push (gimple, heap, *stmts, def_stmt);
441 *oprnd = new_oprnd;
442 }
443
444 *half_type = new_type;
445 return true;
446}
447
448
20f06221
DN
449/* Function vect_recog_widen_mult_pattern
450
451 Try to find the following pattern:
452
453 type a_t, b_t;
454 TYPE a_T, b_T, prod_T;
455
456 S1 a_t = ;
457 S2 b_t = ;
458 S3 a_T = (TYPE) a_t;
459 S4 b_T = (TYPE) b_t;
460 S5 prod_T = a_T * b_T;
461
462 where type 'TYPE' is at least double the size of type 'type'.
463
383d9c83
IR
464 Also detect unsgigned cases:
465
466 unsigned type a_t, b_t;
467 unsigned TYPE u_prod_T;
468 TYPE a_T, b_T, prod_T;
469
470 S1 a_t = ;
471 S2 b_t = ;
472 S3 a_T = (TYPE) a_t;
473 S4 b_T = (TYPE) b_t;
474 S5 prod_T = a_T * b_T;
475 S6 u_prod_T = (unsigned TYPE) prod_T;
476
477 and multiplication by constants:
478
479 type a_t;
480 TYPE a_T, prod_T;
481
482 S1 a_t = ;
483 S3 a_T = (TYPE) a_t;
484 S5 prod_T = a_T * CONST;
485
51312233
IR
486 A special case of multiplication by constants is when 'TYPE' is 4 times
487 bigger than 'type', but CONST fits an intermediate type 2 times smaller
488 than 'TYPE'. In that case we create an additional pattern stmt for S3
489 to create a variable of the intermediate type, and perform widen-mult
490 on the intermediate type as well:
491
492 type a_t;
493 interm_type a_it;
494 TYPE a_T, prod_T, prod_T';
495
496 S1 a_t = ;
497 S3 a_T = (TYPE) a_t;
498 '--> a_it = (interm_type) a_t;
499 S5 prod_T = a_T * CONST;
500 '--> prod_T' = a_it w* CONST;
20f06221 501
51312233
IR
502 Input/Output:
503
504 * STMTS: Contains a stmt from which the pattern search begins. In the
505 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
506 is detected. In case of unsigned widen-mult, the original stmt (S5) is
507 replaced with S6 in STMTS. In case of multiplication by a constant
508 of an intermediate type (the last case above), STMTS also contains S3
509 (inserted before S5).
20f06221
DN
510
511 Output:
512
513 * TYPE_IN: The type of the input arguments to the pattern.
514
383d9c83 515 * TYPE_OUT: The type of the output of this pattern.
20f06221
DN
516
517 * Return value: A new stmt that will be used to replace the sequence of
383d9c83 518 stmts that constitute the pattern. In this case it will be:
20f06221
DN
519 WIDEN_MULT <a_t, b_t>
520*/
521
726a989a 522static gimple
51312233
IR
523vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
524 tree *type_in, tree *type_out)
20f06221 525{
51312233 526 gimple last_stmt = VEC_pop (gimple, *stmts);
726a989a 527 gimple def_stmt0, def_stmt1;
89d67cca
DN
528 tree oprnd0, oprnd1;
529 tree type, half_type0, half_type1;
726a989a 530 gimple pattern_stmt;
383d9c83 531 tree vectype, vectype_out = NULL_TREE;
89d67cca 532 tree dummy;
726a989a 533 tree var;
89d67cca 534 enum tree_code dummy_code;
5d593372
IR
535 int dummy_int;
536 VEC (tree, heap) *dummy_vec;
36ba4aae 537 bool op1_ok;
89d67cca 538
51312233 539 if (!is_gimple_assign (last_stmt))
89d67cca
DN
540 return NULL;
541
51312233 542 type = gimple_expr_type (last_stmt);
89d67cca
DN
543
544 /* Starting from LAST_STMT, follow the defs of its uses in search
545 of the above pattern. */
546
51312233 547 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
89d67cca
DN
548 return NULL;
549
51312233
IR
550 oprnd0 = gimple_assign_rhs1 (last_stmt);
551 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
552 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
553 || !types_compatible_p (TREE_TYPE (oprnd1), type))
89d67cca
DN
554 return NULL;
555
383d9c83 556 /* Check argument 0. */
36ba4aae
IR
557 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
558 return NULL;
383d9c83 559 /* Check argument 1. */
51312233 560 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
89d67cca 561
36ba4aae 562 if (op1_ok)
383d9c83
IR
563 {
564 oprnd0 = gimple_assign_rhs1 (def_stmt0);
565 oprnd1 = gimple_assign_rhs1 (def_stmt1);
566 }
36ba4aae 567 else
383d9c83 568 {
51312233 569 if (TREE_CODE (oprnd1) == INTEGER_CST
383d9c83 570 && TREE_CODE (half_type0) == INTEGER_TYPE
36ba4aae
IR
571 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
572 &oprnd0, stmts, type,
573 &half_type0, def_stmt0))
51312233 574 half_type1 = half_type0;
383d9c83
IR
575 else
576 return NULL;
577 }
578
579 /* Handle unsigned case. Look for
580 S6 u_prod_T = (unsigned TYPE) prod_T;
581 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
582 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
583 {
51312233 584 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
383d9c83
IR
585 imm_use_iterator imm_iter;
586 use_operand_p use_p;
587 int nuses = 0;
588 gimple use_stmt = NULL;
589 tree use_type;
590
591 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
592 return NULL;
593
594 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
595 {
4fb489e7
JJ
596 if (is_gimple_debug (USE_STMT (use_p)))
597 continue;
383d9c83
IR
598 use_stmt = USE_STMT (use_p);
599 nuses++;
600 }
601
602 if (nuses != 1 || !is_gimple_assign (use_stmt)
603 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
604 return NULL;
605
606 use_lhs = gimple_assign_lhs (use_stmt);
607 use_type = TREE_TYPE (use_lhs);
608 if (!INTEGRAL_TYPE_P (use_type)
609 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
610 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
611 return NULL;
612
613 type = use_type;
51312233 614 last_stmt = use_stmt;
383d9c83 615 }
89d67cca 616
9600efe1 617 if (!types_compatible_p (half_type0, half_type1))
89d67cca
DN
618 return NULL;
619
620 /* Pattern detected. */
621 if (vect_print_dump_info (REPORT_DETAILS))
622 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
623
624 /* Check target support */
625 vectype = get_vectype_for_scalar_type (half_type0);
b690cc0f 626 vectype_out = get_vectype_for_scalar_type (type);
03d3e953 627 if (!vectype
d163c4f7 628 || !vectype_out
51312233 629 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
b690cc0f 630 vectype_out, vectype,
726a989a 631 &dummy, &dummy, &dummy_code,
5d593372 632 &dummy_code, &dummy_int, &dummy_vec))
89d67cca
DN
633 return NULL;
634
635 *type_in = vectype;
b690cc0f 636 *type_out = vectype_out;
89d67cca
DN
637
638 /* Pattern supported. Create a stmt to be used to replace the pattern: */
726a989a
RB
639 var = vect_recog_temp_ssa_var (type, NULL);
640 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
641 oprnd1);
726a989a 642
89d67cca 643 if (vect_print_dump_info (REPORT_DETAILS))
726a989a
RB
644 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
645
51312233 646 VEC_safe_push (gimple, heap, *stmts, last_stmt);
726a989a 647 return pattern_stmt;
20f06221
DN
648}
649
650
0b2229b0
RG
651/* Function vect_recog_pow_pattern
652
653 Try to find the following pattern:
654
655 x = POW (y, N);
656
657 with POW being one of pow, powf, powi, powif and N being
658 either 2 or 0.5.
659
660 Input:
661
662 * LAST_STMT: A stmt from which the pattern search begins.
663
664 Output:
665
666 * TYPE_IN: The type of the input arguments to the pattern.
667
668 * TYPE_OUT: The type of the output of this pattern.
669
670 * Return value: A new stmt that will be used to replace the sequence of
671 stmts that constitute the pattern. In this case it will be:
726a989a 672 x = x * x
0b2229b0 673 or
726a989a 674 x = sqrt (x)
0b2229b0
RG
675*/
676
726a989a 677static gimple
51312233
IR
678vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
679 tree *type_out)
0b2229b0 680{
51312233 681 gimple last_stmt = VEC_index (gimple, *stmts, 0);
726a989a
RB
682 tree fn, base, exp = NULL;
683 gimple stmt;
684 tree var;
0b2229b0 685
51312233 686 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
0b2229b0
RG
687 return NULL;
688
51312233 689 fn = gimple_call_fndecl (last_stmt);
52bd463c
RG
690 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
691 return NULL;
692
0b2229b0
RG
693 switch (DECL_FUNCTION_CODE (fn))
694 {
695 case BUILT_IN_POWIF:
696 case BUILT_IN_POWI:
697 case BUILT_IN_POWF:
698 case BUILT_IN_POW:
51312233
IR
699 base = gimple_call_arg (last_stmt, 0);
700 exp = gimple_call_arg (last_stmt, 1);
0b2229b0
RG
701 if (TREE_CODE (exp) != REAL_CST
702 && TREE_CODE (exp) != INTEGER_CST)
726a989a 703 return NULL;
0b2229b0
RG
704 break;
705
726a989a
RB
706 default:
707 return NULL;
0b2229b0
RG
708 }
709
710 /* We now have a pow or powi builtin function call with a constant
711 exponent. */
712
0b2229b0
RG
713 *type_out = NULL_TREE;
714
715 /* Catch squaring. */
716 if ((host_integerp (exp, 0)
717 && tree_low_cst (exp, 0) == 2)
718 || (TREE_CODE (exp) == REAL_CST
719 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
c6b1b49b
RG
720 {
721 *type_in = TREE_TYPE (base);
726a989a
RB
722
723 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
724 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
726a989a 725 return stmt;
c6b1b49b 726 }
0b2229b0
RG
727
728 /* Catch square root. */
729 if (TREE_CODE (exp) == REAL_CST
730 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
731 {
732 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
c6b1b49b
RG
733 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
734 if (*type_in)
735 {
726a989a
RB
736 gimple stmt = gimple_build_call (newfn, 1, base);
737 if (vectorizable_function (stmt, *type_in, *type_in)
738 != NULL_TREE)
739 {
740 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
b8698a0f 741 gimple_call_set_lhs (stmt, var);
726a989a
RB
742 return stmt;
743 }
c6b1b49b 744 }
0b2229b0
RG
745 }
746
726a989a 747 return NULL;
0b2229b0
RG
748}
749
750
20f06221
DN
751/* Function vect_recog_widen_sum_pattern
752
753 Try to find the following pattern:
754
b8698a0f 755 type x_t;
20f06221
DN
756 TYPE x_T, sum = init;
757 loop:
758 sum_0 = phi <init, sum_1>
759 S1 x_t = *p;
760 S2 x_T = (TYPE) x_t;
761 S3 sum_1 = x_T + sum_0;
762
b8698a0f 763 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 764 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 765 a special case of a reduction computation.
20f06221
DN
766
767 Input:
768
769 * LAST_STMT: A stmt from which the pattern search begins. In the example,
770 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 771
20f06221 772 Output:
b8698a0f 773
20f06221
DN
774 * TYPE_IN: The type of the input arguments to the pattern.
775
776 * TYPE_OUT: The type of the output of this pattern.
777
778 * Return value: A new stmt that will be used to replace the sequence of
779 stmts that constitute the pattern. In this case it will be:
780 WIDEN_SUM <x_t, sum_0>
d29de1bf 781
b8698a0f 782 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 783 vectorized without preserving all the intermediate results. It
b8698a0f
L
784 produces only N/2 (widened) results (by summing up pairs of
785 intermediate results) rather than all N results. Therefore, we
786 cannot allow this pattern when we want to get all the results and in
787 the correct order (as is the case when this computation is in an
d29de1bf 788 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 789
726a989a 790static gimple
51312233
IR
791vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
792 tree *type_out)
20f06221 793{
51312233 794 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
20f06221 795 tree oprnd0, oprnd1;
51312233 796 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 797 tree type, half_type;
726a989a 798 gimple pattern_stmt;
d29de1bf
DN
799 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
800 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
726a989a 801 tree var;
20f06221 802
51312233 803 if (!is_gimple_assign (last_stmt))
20f06221
DN
804 return NULL;
805
51312233 806 type = gimple_expr_type (last_stmt);
20f06221
DN
807
808 /* Look for the following pattern
809 DX = (TYPE) X;
810 sum_1 = DX + sum_0;
811 In which DX is at least double the size of X, and sum_1 has been
812 recognized as a reduction variable.
813 */
814
815 /* Starting from LAST_STMT, follow the defs of its uses in search
816 of the above pattern. */
817
51312233 818 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
819 return NULL;
820
821 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
822 return NULL;
823
51312233
IR
824 oprnd0 = gimple_assign_rhs1 (last_stmt);
825 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
826 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
827 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221
DN
828 return NULL;
829
51312233 830 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
831 we know that oprnd1 is the reduction variable (defined by a loop-header
832 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
833 Left to check that oprnd0 is defined by a cast from type 'type' to type
834 'TYPE'. */
835
51312233 836 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
20f06221
DN
837 return NULL;
838
726a989a 839 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
840 *type_in = half_type;
841 *type_out = type;
842
843 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a
RB
844 var = vect_recog_temp_ssa_var (type, NULL);
845 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
846 oprnd0, oprnd1);
726a989a 847
20f06221
DN
848 if (vect_print_dump_info (REPORT_DETAILS))
849 {
850 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
726a989a 851 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
20f06221 852 }
d29de1bf
DN
853
854 /* We don't allow changing the order of the computation in the inner-loop
855 when doing outer-loop vectorization. */
51312233 856 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
d29de1bf 857
726a989a 858 return pattern_stmt;
20f06221
DN
859}
860
861
1107f3ae
IR
862/* Return TRUE if the operation in STMT can be performed on a smaller type.
863
864 Input:
865 STMT - a statement to check.
866 DEF - we support operations with two operands, one of which is constant.
867 The other operand can be defined by a demotion operation, or by a
868 previous statement in a sequence of over-promoted operations. In the
869 later case DEF is used to replace that operand. (It is defined by a
870 pattern statement we created for the previous statement in the
871 sequence).
872
873 Input/output:
874 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
875 NULL, it's the type of DEF.
876 STMTS - additional pattern statements. If a pattern statement (type
877 conversion) is created in this function, its original statement is
878 added to STMTS.
879
880 Output:
881 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
882 operands to use in the new pattern statement for STMT (will be created
883 in vect_recog_over_widening_pattern ()).
884 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
885 statements for STMT: the first one is a type promotion and the second
886 one is the operation itself. We return the type promotion statement
363477c0 887 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1107f3ae
IR
888 the second pattern statement. */
889
890static bool
891vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
892 tree *op0, tree *op1, gimple *new_def_stmt,
893 VEC (gimple, heap) **stmts)
894{
895 enum tree_code code;
896 tree const_oprnd, oprnd;
897 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
898 gimple def_stmt, new_stmt;
899 bool first = false;
ad949bcc
JJ
900 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
901 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
1107f3ae 902
d6e1acf6
JJ
903 *op0 = NULL_TREE;
904 *op1 = NULL_TREE;
1107f3ae
IR
905 *new_def_stmt = NULL;
906
907 if (!is_gimple_assign (stmt))
908 return false;
909
910 code = gimple_assign_rhs_code (stmt);
911 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
912 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
913 return false;
914
915 oprnd = gimple_assign_rhs1 (stmt);
916 const_oprnd = gimple_assign_rhs2 (stmt);
917 type = gimple_expr_type (stmt);
918
919 if (TREE_CODE (oprnd) != SSA_NAME
920 || TREE_CODE (const_oprnd) != INTEGER_CST)
921 return false;
922
923 /* If we are in the middle of a sequence, we use DEF from a previous
924 statement. Otherwise, OPRND has to be a result of type promotion. */
925 if (*new_type)
926 {
927 half_type = *new_type;
928 oprnd = def;
929 }
930 else
931 {
932 first = true;
fb2c2b16 933 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
ad949bcc
JJ
934 || !gimple_bb (def_stmt)
935 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
fb2c2b16 936 || !vinfo_for_stmt (def_stmt))
1107f3ae
IR
937 return false;
938 }
939
940 /* Can we perform the operation on a smaller type? */
941 switch (code)
942 {
943 case BIT_IOR_EXPR:
944 case BIT_XOR_EXPR:
945 case BIT_AND_EXPR:
946 if (!int_fits_type_p (const_oprnd, half_type))
947 {
948 /* HALF_TYPE is not enough. Try a bigger type if possible. */
949 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
950 return false;
951
952 interm_type = build_nonstandard_integer_type (
953 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
954 if (!int_fits_type_p (const_oprnd, interm_type))
955 return false;
956 }
957
958 break;
959
960 case LSHIFT_EXPR:
961 /* Try intermediate type - HALF_TYPE is not enough for sure. */
962 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
963 return false;
964
965 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
966 (e.g., if the original value was char, the shift amount is at most 8
967 if we want to use short). */
968 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
969 return false;
970
971 interm_type = build_nonstandard_integer_type (
972 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
973
974 if (!vect_supportable_shift (code, interm_type))
975 return false;
976
977 break;
978
979 case RSHIFT_EXPR:
980 if (vect_supportable_shift (code, half_type))
981 break;
982
983 /* Try intermediate type - HALF_TYPE is not supported. */
984 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
985 return false;
986
987 interm_type = build_nonstandard_integer_type (
988 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
989
990 if (!vect_supportable_shift (code, interm_type))
991 return false;
992
993 break;
994
995 default:
996 gcc_unreachable ();
997 }
998
999 /* There are four possible cases:
1000 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1001 the first statement in the sequence)
1002 a. The original, HALF_TYPE, is not enough - we replace the promotion
1003 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1004 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1005 promotion.
1006 2. OPRND is defined by a pattern statement we created.
1007 a. Its type is not sufficient for the operation, we create a new stmt:
1008 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1009 this statement in NEW_DEF_STMT, and it is later put in
363477c0 1010 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1107f3ae
IR
1011 b. OPRND is good to use in the new statement. */
1012 if (first)
1013 {
1014 if (interm_type)
1015 {
1016 /* Replace the original type conversion HALF_TYPE->TYPE with
1017 HALF_TYPE->INTERM_TYPE. */
1018 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1019 {
1020 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1021 /* Check if the already created pattern stmt is what we need. */
1022 if (!is_gimple_assign (new_stmt)
1023 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1024 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1025 return false;
1026
aede1227 1027 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1107f3ae
IR
1028 oprnd = gimple_assign_lhs (new_stmt);
1029 }
1030 else
1031 {
1032 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1033 oprnd = gimple_assign_rhs1 (def_stmt);
1034 tmp = create_tmp_reg (interm_type, NULL);
1035 add_referenced_var (tmp);
1036 new_oprnd = make_ssa_name (tmp, NULL);
1037 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1038 oprnd, NULL_TREE);
1107f3ae
IR
1039 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1040 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1041 oprnd = new_oprnd;
1042 }
1043 }
1044 else
1045 {
1046 /* Retrieve the operand before the type promotion. */
1047 oprnd = gimple_assign_rhs1 (def_stmt);
1048 }
1049 }
1050 else
1051 {
1052 if (interm_type)
1053 {
1054 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1055 tmp = create_tmp_reg (interm_type, NULL);
1056 add_referenced_var (tmp);
1057 new_oprnd = make_ssa_name (tmp, NULL);
1058 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1059 oprnd, NULL_TREE);
1107f3ae
IR
1060 oprnd = new_oprnd;
1061 *new_def_stmt = new_stmt;
1062 }
1063
1064 /* Otherwise, OPRND is already set. */
1065 }
1066
1067 if (interm_type)
1068 *new_type = interm_type;
1069 else
1070 *new_type = half_type;
1071
1072 *op0 = oprnd;
1073 *op1 = fold_convert (*new_type, const_oprnd);
1074
1075 return true;
1076}
1077
1078
1079/* Try to find a statement or a sequence of statements that can be performed
1080 on a smaller type:
1081
1082 type x_t;
1083 TYPE x_T, res0_T, res1_T;
1084 loop:
1085 S1 x_t = *p;
1086 S2 x_T = (TYPE) x_t;
1087 S3 res0_T = op (x_T, C0);
1088 S4 res1_T = op (res0_T, C1);
1089 S5 ... = () res1_T; - type demotion
1090
1091 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1092 constants.
1093 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1094 be 'type' or some intermediate type. For now, we expect S5 to be a type
71c92d17 1095 demotion operation. We also check that S3 and S4 have only one use. */
1107f3ae 1096
1107f3ae
IR
1097static gimple
1098vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1099 tree *type_in, tree *type_out)
1100{
1101 gimple stmt = VEC_pop (gimple, *stmts);
1102 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1103 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1104 imm_use_iterator imm_iter;
1105 use_operand_p use_p;
1106 int nuses = 0;
1107 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1108 bool first;
1109 struct loop *loop = (gimple_bb (stmt))->loop_father;
b2a1a74d 1110 tree type = NULL;
1107f3ae
IR
1111
1112 first = true;
1113 while (1)
1114 {
1115 if (!vinfo_for_stmt (stmt)
1116 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1117 return NULL;
1118
1119 new_def_stmt = NULL;
1120 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1121 &op0, &op1, &new_def_stmt,
1122 stmts))
1123 {
1124 if (first)
1125 return NULL;
1126 else
1127 break;
1128 }
1129
1130 /* STMT can be performed on a smaller type. Check its uses. */
1131 lhs = gimple_assign_lhs (stmt);
1132 nuses = 0;
1133 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1134 {
1135 if (is_gimple_debug (USE_STMT (use_p)))
1136 continue;
1137 use_stmt = USE_STMT (use_p);
1138 nuses++;
1139 }
1140
1141 if (nuses != 1 || !is_gimple_assign (use_stmt)
1142 || !gimple_bb (use_stmt)
1143 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1144 return NULL;
1145
1146 /* Create pattern statement for STMT. */
1147 vectype = get_vectype_for_scalar_type (new_type);
1148 if (!vectype)
1149 return NULL;
1150
1151 /* We want to collect all the statements for which we create pattern
1152 statetments, except for the case when the last statement in the
1153 sequence doesn't have a corresponding pattern statement. In such
1154 case we associate the last pattern statement with the last statement
36ba4aae 1155 in the sequence. Therefore, we only add the original statement to
1107f3ae
IR
1156 the list if we know that it is not the last. */
1157 if (prev_stmt)
1158 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1159
1160 var = vect_recog_temp_ssa_var (new_type, NULL);
62371b92
JJ
1161 pattern_stmt
1162 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1163 op0, op1);
1107f3ae 1164 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
083481d8 1165 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1107f3ae
IR
1166
1167 if (vect_print_dump_info (REPORT_DETAILS))
1168 {
1169 fprintf (vect_dump, "created pattern stmt: ");
1170 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1171 }
1172
b2a1a74d 1173 type = gimple_expr_type (stmt);
1107f3ae
IR
1174 prev_stmt = stmt;
1175 stmt = use_stmt;
1176
1177 first = false;
1178 }
1179
1180 /* We got a sequence. We expect it to end with a type demotion operation.
1181 Otherwise, we quit (for now). There are three possible cases: the
1182 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1183 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1184 NEW_TYPE differs (we create a new conversion statement). */
1185 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1186 {
1187 use_lhs = gimple_assign_lhs (use_stmt);
1188 use_type = TREE_TYPE (use_lhs);
b2a1a74d
IR
1189 /* Support only type promotion or signedess change. Check that USE_TYPE
1190 is not bigger than the original type. */
1107f3ae 1191 if (!INTEGRAL_TYPE_P (use_type)
b2a1a74d
IR
1192 || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)
1193 || TYPE_PRECISION (type) < TYPE_PRECISION (use_type))
1107f3ae
IR
1194 return NULL;
1195
1196 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1197 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1198 {
1199 /* Create NEW_TYPE->USE_TYPE conversion. */
1200 tmp = create_tmp_reg (use_type, NULL);
1201 add_referenced_var (tmp);
1202 new_oprnd = make_ssa_name (tmp, NULL);
1203 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1204 var, NULL_TREE);
1107f3ae
IR
1205 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1206
1207 *type_in = get_vectype_for_scalar_type (new_type);
1208 *type_out = get_vectype_for_scalar_type (use_type);
1209
1210 /* We created a pattern statement for the last statement in the
1211 sequence, so we don't need to associate it with the pattern
1212 statement created for PREV_STMT. Therefore, we add PREV_STMT
1213 to the list in order to mark it later in vect_pattern_recog_1. */
1214 if (prev_stmt)
1215 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1216 }
1217 else
1218 {
1219 if (prev_stmt)
363477c0
JJ
1220 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1221 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1107f3ae
IR
1222
1223 *type_in = vectype;
1224 *type_out = NULL_TREE;
1225 }
1226
1227 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1228 }
1229 else
1230 /* TODO: support general case, create a conversion to the correct type. */
1231 return NULL;
1232
1233 /* Pattern detected. */
1234 if (vect_print_dump_info (REPORT_DETAILS))
1235 {
1236 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1237 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1238 }
1239
1240 return pattern_stmt;
1241}
1242
36ba4aae
IR
1243/* Detect widening shift pattern:
1244
1245 type a_t;
1246 TYPE a_T, res_T;
1247
1248 S1 a_t = ;
1249 S2 a_T = (TYPE) a_t;
1250 S3 res_T = a_T << CONST;
1251
1252 where type 'TYPE' is at least double the size of type 'type'.
1253
5bfdb7d8 1254 Also detect unsigned cases:
36ba4aae
IR
1255
1256 unsigned type a_t;
1257 unsigned TYPE u_res_T;
1258 TYPE a_T, res_T;
1259
1260 S1 a_t = ;
1261 S2 a_T = (TYPE) a_t;
1262 S3 res_T = a_T << CONST;
1263 S4 u_res_T = (unsigned TYPE) res_T;
1264
1265 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1266 create an additional pattern stmt for S2 to create a variable of an
1267 intermediate type, and perform widen-shift on the intermediate type:
1268
1269 type a_t;
1270 interm_type a_it;
1271 TYPE a_T, res_T, res_T';
1272
1273 S1 a_t = ;
1274 S2 a_T = (TYPE) a_t;
1275 '--> a_it = (interm_type) a_t;
1276 S3 res_T = a_T << CONST;
1277 '--> res_T' = a_it <<* CONST;
1278
1279 Input/Output:
1280
1281 * STMTS: Contains a stmt from which the pattern search begins.
1282 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1283 in STMTS. When an intermediate type is used and a pattern statement is
1284 created for S2, we also put S2 here (before S3).
1285
1286 Output:
1287
1288 * TYPE_IN: The type of the input arguments to the pattern.
1289
1290 * TYPE_OUT: The type of the output of this pattern.
1291
1292 * Return value: A new stmt that will be used to replace the sequence of
1293 stmts that constitute the pattern. In this case it will be:
1294 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1295
1296static gimple
1297vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1298 tree *type_in, tree *type_out)
1299{
1300 gimple last_stmt = VEC_pop (gimple, *stmts);
1301 gimple def_stmt0;
1302 tree oprnd0, oprnd1;
1303 tree type, half_type0;
1304 gimple pattern_stmt, orig_stmt = NULL;
1305 tree vectype, vectype_out = NULL_TREE;
1306 tree dummy;
1307 tree var;
1308 enum tree_code dummy_code;
1309 int dummy_int;
1310 VEC (tree, heap) * dummy_vec;
1311 gimple use_stmt = NULL;
1312 bool over_widen = false;
1313
1314 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1315 return NULL;
1316
1317 orig_stmt = last_stmt;
1318 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1319 {
1320 /* This statement was also detected as over-widening operation (it can't
1321 be any other pattern, because only over-widening detects shifts).
1322 LAST_STMT is the final type demotion statement, but its related
1323 statement is shift. We analyze the related statement to catch cases:
1324
1325 orig code:
1326 type a_t;
1327 itype res;
1328 TYPE a_T, res_T;
1329
1330 S1 a_T = (TYPE) a_t;
1331 S2 res_T = a_T << CONST;
1332 S3 res = (itype)res_T;
1333
1334 (size of type * 2 <= size of itype
1335 and size of itype * 2 <= size of TYPE)
1336
1337 code after over-widening pattern detection:
1338
1339 S1 a_T = (TYPE) a_t;
1340 --> a_it = (itype) a_t;
1341 S2 res_T = a_T << CONST;
1342 S3 res = (itype)res_T; <--- LAST_STMT
1343 --> res = a_it << CONST;
1344
1345 after widen_shift:
1346
1347 S1 a_T = (TYPE) a_t;
1348 --> a_it = (itype) a_t; - redundant
1349 S2 res_T = a_T << CONST;
1350 S3 res = (itype)res_T;
1351 --> res = a_t w<< CONST;
1352
1353 i.e., we replace the three statements with res = a_t w<< CONST. */
1354 last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
1355 over_widen = true;
1356 }
1357
1358 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1359 return NULL;
1360
1361 oprnd0 = gimple_assign_rhs1 (last_stmt);
1362 oprnd1 = gimple_assign_rhs2 (last_stmt);
1363 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1364 return NULL;
1365
1366 /* Check operand 0: it has to be defined by a type promotion. */
1367 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
1368 return NULL;
1369
1370 /* Check operand 1: has to be positive. We check that it fits the type
1371 in vect_handle_widen_op_by_const (). */
1372 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1373 return NULL;
1374
1375 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1376 type = gimple_expr_type (last_stmt);
1377
1378 /* Check if this a widening operation. */
1379 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1380 &oprnd0, stmts,
1381 type, &half_type0, def_stmt0))
1382 return NULL;
1383
1384 /* Handle unsigned case. Look for
1385 S4 u_res_T = (unsigned TYPE) res_T;
1386 Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1387 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
1388 {
1389 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
1390 imm_use_iterator imm_iter;
1391 use_operand_p use_p;
1392 int nuses = 0;
1393 tree use_type;
1394
1395 if (over_widen)
1396 {
1397 /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1398 We check here that TYPE is the correct type for the operation,
1399 i.e., it's the type of the original result. */
1400 tree orig_type = gimple_expr_type (orig_stmt);
1401 if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
1402 || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
1403 return NULL;
1404 }
1405 else
1406 {
1407 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1408 {
1409 if (is_gimple_debug (USE_STMT (use_p)))
1410 continue;
1411 use_stmt = USE_STMT (use_p);
1412 nuses++;
1413 }
1414
1415 if (nuses != 1 || !is_gimple_assign (use_stmt)
1416 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1417 return NULL;
1418
1419 use_lhs = gimple_assign_lhs (use_stmt);
1420 use_type = TREE_TYPE (use_lhs);
1421
1422 if (!INTEGRAL_TYPE_P (use_type)
1423 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
1424 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
1425 return NULL;
1426
1427 type = use_type;
1428 }
1429 }
1430
1431 /* Pattern detected. */
1432 if (vect_print_dump_info (REPORT_DETAILS))
1433 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1434
1435 /* Check target support. */
1436 vectype = get_vectype_for_scalar_type (half_type0);
1437 vectype_out = get_vectype_for_scalar_type (type);
1438
1439 if (!vectype
1440 || !vectype_out
1441 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1442 vectype_out, vectype,
1443 &dummy, &dummy, &dummy_code,
1444 &dummy_code, &dummy_int,
1445 &dummy_vec))
1446 return NULL;
1447
1448 *type_in = vectype;
1449 *type_out = vectype_out;
1450
1451 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1452 var = vect_recog_temp_ssa_var (type, NULL);
1453 pattern_stmt =
1454 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1455
1456 if (vect_print_dump_info (REPORT_DETAILS))
1457 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1458
1459 if (use_stmt)
1460 last_stmt = use_stmt;
1461 else
1462 last_stmt = orig_stmt;
1463
1464 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1465 return pattern_stmt;
1466}
1107f3ae 1467
732a0ad3
JJ
1468/* Detect a vector by vector shift pattern that wouldn't be otherwise
1469 vectorized:
1470
1471 type a_t;
1472 TYPE b_T, res_T;
1473
1474 S1 a_t = ;
1475 S2 b_T = ;
1476 S3 res_T = b_T op a_t;
1477
1478 where type 'TYPE' is a type with different size than 'type',
1479 and op is <<, >> or rotate.
1480
1481 Also detect cases:
1482
1483 type a_t;
1484 TYPE b_T, c_T, res_T;
1485
1486 S0 c_T = ;
1487 S1 a_t = (type) c_T;
1488 S2 b_T = ;
1489 S3 res_T = b_T op a_t;
1490
1491 Input/Output:
1492
1493 * STMTS: Contains a stmt from which the pattern search begins,
1494 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1495 with a shift/rotate which has same type on both operands, in the
1496 second case just b_T op c_T, in the first case with added cast
363477c0 1497 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
732a0ad3
JJ
1498
1499 Output:
1500
1501 * TYPE_IN: The type of the input arguments to the pattern.
1502
1503 * TYPE_OUT: The type of the output of this pattern.
1504
1505 * Return value: A new stmt that will be used to replace the shift/rotate
1506 S3 stmt. */
1507
1508static gimple
1509vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1510 tree *type_in, tree *type_out)
1511{
1512 gimple last_stmt = VEC_pop (gimple, *stmts);
1513 tree oprnd0, oprnd1, lhs, var;
1514 gimple pattern_stmt, def_stmt;
1515 enum tree_code rhs_code;
1516 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1517 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1518 enum vect_def_type dt;
1519 tree def;
1520
1521 if (!is_gimple_assign (last_stmt))
1522 return NULL;
1523
1524 rhs_code = gimple_assign_rhs_code (last_stmt);
1525 switch (rhs_code)
1526 {
1527 case LSHIFT_EXPR:
1528 case RSHIFT_EXPR:
1529 case LROTATE_EXPR:
1530 case RROTATE_EXPR:
1531 break;
1532 default:
1533 return NULL;
1534 }
1535
1536 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1537 return NULL;
1538
1539 lhs = gimple_assign_lhs (last_stmt);
1540 oprnd0 = gimple_assign_rhs1 (last_stmt);
1541 oprnd1 = gimple_assign_rhs2 (last_stmt);
1542 if (TREE_CODE (oprnd0) != SSA_NAME
1543 || TREE_CODE (oprnd1) != SSA_NAME
1544 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1545 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1546 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1547 || TYPE_PRECISION (TREE_TYPE (lhs))
1548 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1549 return NULL;
1550
1551 if (!vect_is_simple_use (oprnd1, loop_vinfo, NULL, &def_stmt, &def, &dt))
1552 return NULL;
1553
1554 if (dt != vect_internal_def)
1555 return NULL;
1556
1557 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1558 *type_out = *type_in;
1559 if (*type_in == NULL_TREE)
1560 return NULL;
1561
1562 def = NULL_TREE;
1563 if (gimple_assign_cast_p (def_stmt))
1564 {
1565 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1566 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1567 && TYPE_PRECISION (TREE_TYPE (rhs1))
1568 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1569 def = rhs1;
1570 }
1571
1572 if (def == NULL_TREE)
1573 {
1574 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1575 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1576 NULL_TREE);
083481d8 1577 new_pattern_def_seq (stmt_vinfo, def_stmt);
732a0ad3
JJ
1578 }
1579
1580 /* Pattern detected. */
1581 if (vect_print_dump_info (REPORT_DETAILS))
1582 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1583
1584 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1585 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1586 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1587
1588 if (vect_print_dump_info (REPORT_DETAILS))
1589 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1590
1591 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1592 return pattern_stmt;
1593}
1594
363477c0
JJ
1595/* Detect a signed division by power of two constant that wouldn't be
1596 otherwise vectorized:
1597
1598 type a_t, b_t;
1599
1600 S1 a_t = b_t / N;
1601
1602 where type 'type' is a signed integral type and N is a constant positive
1603 power of two.
1604
1605 Similarly handle signed modulo by power of two constant:
1606
1607 S4 a_t = b_t % N;
1608
1609 Input/Output:
1610
1611 * STMTS: Contains a stmt from which the pattern search begins,
1612 i.e. the division stmt. S1 is replaced by:
1613 S3 y_t = b_t < 0 ? N - 1 : 0;
1614 S2 x_t = b_t + y_t;
1615 S1' a_t = x_t >> log2 (N);
1616
1617 S4 is replaced by (where *_T temporaries have unsigned type):
1618 S9 y_T = b_t < 0 ? -1U : 0U;
1619 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1620 S7 z_t = (type) z_T;
1621 S6 w_t = b_t + z_t;
1622 S5 x_t = w_t & (N - 1);
1623 S4' a_t = x_t - z_t;
1624
1625 Output:
1626
1627 * TYPE_IN: The type of the input arguments to the pattern.
1628
1629 * TYPE_OUT: The type of the output of this pattern.
1630
1631 * Return value: A new stmt that will be used to replace the division
1632 S1 or modulo S4 stmt. */
1633
1634static gimple
1635vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
1636 tree *type_in, tree *type_out)
1637{
1638 gimple last_stmt = VEC_pop (gimple, *stmts);
1639 tree oprnd0, oprnd1, vectype, itype, cond;
1640 gimple pattern_stmt, def_stmt;
1641 enum tree_code rhs_code;
1642 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1643 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1644 optab optab;
1645
1646 if (!is_gimple_assign (last_stmt))
1647 return NULL;
1648
1649 rhs_code = gimple_assign_rhs_code (last_stmt);
1650 switch (rhs_code)
1651 {
1652 case TRUNC_DIV_EXPR:
1653 case TRUNC_MOD_EXPR:
1654 break;
1655 default:
1656 return NULL;
1657 }
1658
1659 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1660 return NULL;
1661
1662 oprnd0 = gimple_assign_rhs1 (last_stmt);
1663 oprnd1 = gimple_assign_rhs2 (last_stmt);
1664 itype = TREE_TYPE (oprnd0);
1665 if (TREE_CODE (oprnd0) != SSA_NAME
1666 || TREE_CODE (oprnd1) != INTEGER_CST
1667 || TREE_CODE (itype) != INTEGER_TYPE
1668 || TYPE_UNSIGNED (itype)
1669 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
1670 || !integer_pow2p (oprnd1)
1671 || tree_int_cst_sgn (oprnd1) != 1)
1672 return NULL;
1673
1674 vectype = get_vectype_for_scalar_type (itype);
1675 if (vectype == NULL_TREE)
1676 return NULL;
1677
1678 /* If the target can handle vectorized division or modulo natively,
1679 don't attempt to optimize this. */
1680 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1681 if (optab != NULL)
1682 {
1683 enum machine_mode vec_mode = TYPE_MODE (vectype);
1684 int icode = (int) optab_handler (optab, vec_mode);
1685 if (icode != CODE_FOR_nothing
1686 || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1687 return NULL;
1688 }
1689
1690 /* Pattern detected. */
1691 if (vect_print_dump_info (REPORT_DETAILS))
1692 fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
1693
1694 cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
1695 if (rhs_code == TRUNC_DIV_EXPR)
1696 {
1697 tree var = vect_recog_temp_ssa_var (itype, NULL);
1698 def_stmt
1699 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1700 fold_build2 (MINUS_EXPR, itype,
1701 oprnd1,
1702 build_int_cst (itype,
1703 1)),
1704 build_int_cst (itype, 0));
083481d8 1705 new_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1706 var = vect_recog_temp_ssa_var (itype, NULL);
1707 def_stmt
1708 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1709 gimple_assign_lhs (def_stmt));
083481d8 1710 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1711
1712 pattern_stmt
1713 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1714 vect_recog_temp_ssa_var (itype, NULL),
1715 var,
1716 build_int_cst (itype,
1717 tree_log2 (oprnd1)));
1718 }
1719 else
1720 {
1721 tree signmask;
1722 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1723 if (compare_tree_int (oprnd1, 2) == 0)
1724 {
1725 signmask = vect_recog_temp_ssa_var (itype, NULL);
1726 def_stmt
1727 = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1728 build_int_cst (itype, 1),
1729 build_int_cst (itype, 0));
083481d8 1730 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1731 }
1732 else
1733 {
1734 tree utype
1735 = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
1736 tree vecutype = get_vectype_for_scalar_type (utype);
1737 tree shift
1738 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1739 - tree_log2 (oprnd1));
1740 tree var = vect_recog_temp_ssa_var (utype, NULL);
1741 stmt_vec_info def_stmt_vinfo;
1742
1743 def_stmt
1744 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1745 build_int_cst (utype, -1),
1746 build_int_cst (utype, 0));
1747 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1748 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1749 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
083481d8 1750 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1751 var = vect_recog_temp_ssa_var (utype, NULL);
1752 def_stmt
1753 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1754 gimple_assign_lhs (def_stmt),
1755 shift);
1756 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1757 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1758 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
083481d8 1759 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1760 signmask = vect_recog_temp_ssa_var (itype, NULL);
1761 def_stmt
1762 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1763 NULL_TREE);
083481d8 1764 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1765 }
1766 def_stmt
1767 = gimple_build_assign_with_ops (PLUS_EXPR,
1768 vect_recog_temp_ssa_var (itype, NULL),
1769 oprnd0, signmask);
083481d8 1770 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1771 def_stmt
1772 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1773 vect_recog_temp_ssa_var (itype, NULL),
1774 gimple_assign_lhs (def_stmt),
1775 fold_build2 (MINUS_EXPR, itype,
1776 oprnd1,
1777 build_int_cst (itype,
1778 1)));
083481d8 1779 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1780
1781 pattern_stmt
1782 = gimple_build_assign_with_ops (MINUS_EXPR,
1783 vect_recog_temp_ssa_var (itype, NULL),
1784 gimple_assign_lhs (def_stmt),
1785 signmask);
1786 }
1787
1788 if (vect_print_dump_info (REPORT_DETAILS))
1789 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1790
1791 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1792
1793 *type_in = vectype;
1794 *type_out = vectype;
1795 return pattern_stmt;
1796}
1797
69d2aade
JJ
1798/* Function vect_recog_mixed_size_cond_pattern
1799
1800 Try to find the following pattern:
1801
1802 type x_t, y_t;
1803 TYPE a_T, b_T, c_T;
1804 loop:
1805 S1 a_T = x_t CMP y_t ? b_T : c_T;
1806
1807 where type 'TYPE' is an integral type which has different size
1808 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1809 than 'type', the constants need to fit into an integer type
1810 with the same width as 'type'.
1811
1812 Input:
1813
1814 * LAST_STMT: A stmt from which the pattern search begins.
1815
1816 Output:
1817
1818 * TYPE_IN: The type of the input arguments to the pattern.
1819
1820 * TYPE_OUT: The type of the output of this pattern.
1821
1822 * Return value: A new stmt that will be used to replace the pattern.
1823 Additionally a def_stmt is added.
1824
1825 a_it = x_t CMP y_t ? b_it : c_it;
1826 a_T = (TYPE) a_it; */
1827
1828static gimple
1829vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1830 tree *type_out)
1831{
1832 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1833 tree cond_expr, then_clause, else_clause;
1834 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1835 tree type, vectype, comp_vectype, itype, vecitype;
1836 enum machine_mode cmpmode;
1837 gimple pattern_stmt, def_stmt;
1838 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1839
1840 if (!is_gimple_assign (last_stmt)
1841 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1842 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1843 return NULL;
1844
1845 cond_expr = gimple_assign_rhs1 (last_stmt);
1846 then_clause = gimple_assign_rhs2 (last_stmt);
1847 else_clause = gimple_assign_rhs3 (last_stmt);
1848
1849 if (TREE_CODE (then_clause) != INTEGER_CST
1850 || TREE_CODE (else_clause) != INTEGER_CST)
1851 return NULL;
1852
87aab9b2
JJ
1853 if (!COMPARISON_CLASS_P (cond_expr))
1854 return NULL;
1855
1856 comp_vectype
1857 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1858 if (comp_vectype == NULL_TREE)
69d2aade
JJ
1859 return NULL;
1860
1861 type = gimple_expr_type (last_stmt);
1862 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1863
1864 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1865 return NULL;
1866
1867 vectype = get_vectype_for_scalar_type (type);
1868 if (vectype == NULL_TREE)
1869 return NULL;
1870
1871 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1872 return NULL;
1873
1874 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1875 TYPE_UNSIGNED (type));
1876 if (itype == NULL_TREE
1877 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1878 return NULL;
1879
1880 vecitype = get_vectype_for_scalar_type (itype);
1881 if (vecitype == NULL_TREE)
1882 return NULL;
1883
1884 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1885 return NULL;
1886
1887 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1888 {
1889 if (!int_fits_type_p (then_clause, itype)
1890 || !int_fits_type_p (else_clause, itype))
1891 return NULL;
1892 }
1893
1894 def_stmt
1895 = gimple_build_assign_with_ops3 (COND_EXPR,
1896 vect_recog_temp_ssa_var (itype, NULL),
1897 unshare_expr (cond_expr),
1898 fold_convert (itype, then_clause),
1899 fold_convert (itype, else_clause));
1900 pattern_stmt
1901 = gimple_build_assign_with_ops (NOP_EXPR,
1902 vect_recog_temp_ssa_var (type, NULL),
1903 gimple_assign_lhs (def_stmt), NULL_TREE);
1904
083481d8 1905 new_pattern_def_seq (stmt_vinfo, def_stmt);
69d2aade
JJ
1906 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1907 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1908 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1909 *type_in = vecitype;
1910 *type_out = vectype;
1911
1912 return pattern_stmt;
1913}
1914
1915
71c92d17
JJ
1916/* Helper function of vect_recog_bool_pattern. Called recursively, return
1917 true if bool VAR can be optimized that way. */
1918
1919static bool
1920check_bool_pattern (tree var, loop_vec_info loop_vinfo)
1921{
1922 gimple def_stmt;
1923 enum vect_def_type dt;
1924 tree def, rhs1;
1925 enum tree_code rhs_code;
1926
1927 if (!vect_is_simple_use (var, loop_vinfo, NULL, &def_stmt, &def, &dt))
1928 return false;
1929
1930 if (dt != vect_internal_def)
1931 return false;
1932
1933 if (!is_gimple_assign (def_stmt))
1934 return false;
1935
1936 if (!has_single_use (def))
1937 return false;
1938
1939 rhs1 = gimple_assign_rhs1 (def_stmt);
1940 rhs_code = gimple_assign_rhs_code (def_stmt);
1941 switch (rhs_code)
1942 {
1943 case SSA_NAME:
1944 return check_bool_pattern (rhs1, loop_vinfo);
1945
1946 CASE_CONVERT:
1947 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1948 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1949 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1950 return false;
1951 return check_bool_pattern (rhs1, loop_vinfo);
1952
1953 case BIT_NOT_EXPR:
1954 return check_bool_pattern (rhs1, loop_vinfo);
1955
1956 case BIT_AND_EXPR:
1957 case BIT_IOR_EXPR:
1958 case BIT_XOR_EXPR:
1959 if (!check_bool_pattern (rhs1, loop_vinfo))
1960 return false;
1961 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
1962
1963 default:
1964 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
1965 {
1966 tree vecitype, comp_vectype;
1967
1968 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
1969 if (comp_vectype == NULL_TREE)
1970 return false;
1971
1972 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
1973 {
1974 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1975 tree itype
ab0ef706 1976 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
1977 vecitype = get_vectype_for_scalar_type (itype);
1978 if (vecitype == NULL_TREE)
1979 return false;
1980 }
1981 else
1982 vecitype = comp_vectype;
1983 return expand_vec_cond_expr_p (vecitype, comp_vectype);
1984 }
1985 return false;
1986 }
1987}
1988
1989
1990/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
1991 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
363477c0 1992 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
71c92d17
JJ
1993
1994static tree
1995adjust_bool_pattern_cast (tree type, tree var)
1996{
1997 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
1998 gimple cast_stmt, pattern_stmt;
1999
363477c0 2000 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
71c92d17 2001 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
083481d8 2002 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
71c92d17
JJ
2003 cast_stmt
2004 = gimple_build_assign_with_ops (NOP_EXPR,
2005 vect_recog_temp_ssa_var (type, NULL),
2006 gimple_assign_lhs (pattern_stmt),
2007 NULL_TREE);
2008 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2009 return gimple_assign_lhs (cast_stmt);
2010}
2011
2012
2013/* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2014 recursively. VAR is an SSA_NAME that should be transformed from bool
2015 to a wider integer type, OUT_TYPE is the desired final integer type of
2016 the whole pattern, TRUEVAL should be NULL unless optimizing
2017 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2018 in the then_clause, STMTS is where statements with added pattern stmts
2019 should be pushed to. */
2020
2021static tree
2022adjust_bool_pattern (tree var, tree out_type, tree trueval,
2023 VEC (gimple, heap) **stmts)
2024{
2025 gimple stmt = SSA_NAME_DEF_STMT (var);
2026 enum tree_code rhs_code, def_rhs_code;
2027 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2028 location_t loc;
2029 gimple pattern_stmt, def_stmt;
2030
2031 rhs1 = gimple_assign_rhs1 (stmt);
2032 rhs2 = gimple_assign_rhs2 (stmt);
2033 rhs_code = gimple_assign_rhs_code (stmt);
2034 loc = gimple_location (stmt);
2035 switch (rhs_code)
2036 {
2037 case SSA_NAME:
2038 CASE_CONVERT:
2039 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2040 itype = TREE_TYPE (irhs1);
2041 pattern_stmt
2042 = gimple_build_assign_with_ops (SSA_NAME,
2043 vect_recog_temp_ssa_var (itype, NULL),
2044 irhs1, NULL_TREE);
2045 break;
2046
2047 case BIT_NOT_EXPR:
2048 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2049 itype = TREE_TYPE (irhs1);
2050 pattern_stmt
2051 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2052 vect_recog_temp_ssa_var (itype, NULL),
2053 irhs1, build_int_cst (itype, 1));
2054 break;
2055
2056 case BIT_AND_EXPR:
2057 /* Try to optimize x = y & (a < b ? 1 : 0); into
2058 x = (a < b ? y : 0);
2059
2060 E.g. for:
2061 bool a_b, b_b, c_b;
2062 TYPE d_T;
2063
2064 S1 a_b = x1 CMP1 y1;
2065 S2 b_b = x2 CMP2 y2;
2066 S3 c_b = a_b & b_b;
2067 S4 d_T = (TYPE) c_b;
2068
2069 we would normally emit:
2070
2071 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2072 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2073 S3' c_T = a_T & b_T;
2074 S4' d_T = c_T;
2075
2076 but we can save one stmt by using the
2077 result of one of the COND_EXPRs in the other COND_EXPR and leave
2078 BIT_AND_EXPR stmt out:
2079
2080 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2081 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2082 S4' f_T = c_T;
2083
2084 At least when VEC_COND_EXPR is implemented using masks
2085 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2086 computes the comparison masks and ands it, in one case with
2087 all ones vector, in the other case with a vector register.
2088 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2089 often more expensive. */
2090 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2091 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2092 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2093 {
2094 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2095 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2096 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2097 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2098 {
2099 gimple tstmt;
2100 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2101 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2102 tstmt = VEC_pop (gimple, *stmts);
2103 gcc_assert (tstmt == def_stmt);
2104 VEC_quick_push (gimple, *stmts, stmt);
2105 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2106 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 2107 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
2108 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2109 return irhs2;
2110 }
2111 else
2112 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2113 goto and_ior_xor;
2114 }
2115 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2116 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2117 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2118 {
2119 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2120 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2121 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2122 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2123 {
2124 gimple tstmt;
2125 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2126 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2127 tstmt = VEC_pop (gimple, *stmts);
2128 gcc_assert (tstmt == def_stmt);
2129 VEC_quick_push (gimple, *stmts, stmt);
2130 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2131 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 2132 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
2133 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2134 return irhs1;
2135 }
2136 else
2137 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2138 goto and_ior_xor;
2139 }
2140 /* FALLTHRU */
2141 case BIT_IOR_EXPR:
2142 case BIT_XOR_EXPR:
2143 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2144 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2145 and_ior_xor:
2146 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2147 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2148 {
2149 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2150 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2151 int out_prec = TYPE_PRECISION (out_type);
2152 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2153 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2154 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2155 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2156 else
2157 {
2158 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2159 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2160 }
2161 }
2162 itype = TREE_TYPE (irhs1);
2163 pattern_stmt
2164 = gimple_build_assign_with_ops (rhs_code,
2165 vect_recog_temp_ssa_var (itype, NULL),
2166 irhs1, irhs2);
2167 break;
2168
2169 default:
2170 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2171 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
ab0ef706 2172 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
71c92d17
JJ
2173 {
2174 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2175 itype
ab0ef706 2176 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
2177 }
2178 else
2179 itype = TREE_TYPE (rhs1);
2180 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2181 if (trueval == NULL_TREE)
2182 trueval = build_int_cst (itype, 1);
2183 else
2184 gcc_checking_assert (useless_type_conversion_p (itype,
2185 TREE_TYPE (trueval)));
2186 pattern_stmt
2187 = gimple_build_assign_with_ops3 (COND_EXPR,
2188 vect_recog_temp_ssa_var (itype, NULL),
2189 cond_expr, trueval,
2190 build_int_cst (itype, 0));
2191 break;
2192 }
2193
2194 VEC_safe_push (gimple, heap, *stmts, stmt);
2195 gimple_set_location (pattern_stmt, loc);
2196 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2197 return gimple_assign_lhs (pattern_stmt);
2198}
2199
2200
2201/* Function vect_recog_bool_pattern
2202
2203 Try to find pattern like following:
2204
2205 bool a_b, b_b, c_b, d_b, e_b;
2206 TYPE f_T;
2207 loop:
2208 S1 a_b = x1 CMP1 y1;
2209 S2 b_b = x2 CMP2 y2;
2210 S3 c_b = a_b & b_b;
2211 S4 d_b = x3 CMP3 y3;
2212 S5 e_b = c_b | d_b;
2213 S6 f_T = (TYPE) e_b;
2214
2215 where type 'TYPE' is an integral type.
2216
2217 Input:
2218
2219 * LAST_STMT: A stmt at the end from which the pattern
2220 search begins, i.e. cast of a bool to
2221 an integer type.
2222
2223 Output:
2224
2225 * TYPE_IN: The type of the input arguments to the pattern.
2226
2227 * TYPE_OUT: The type of the output of this pattern.
2228
2229 * Return value: A new stmt that will be used to replace the pattern.
2230
2231 Assuming size of TYPE is the same as size of all comparisons
2232 (otherwise some casts would be added where needed), the above
2233 sequence we create related pattern stmts:
2234 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2235 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2236 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2237 S5' e_T = c_T | d_T;
2238 S6' f_T = e_T;
2239
2240 Instead of the above S3' we could emit:
2241 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2242 S3' c_T = a_T | b_T;
2243 but the above is more efficient. */
2244
2245static gimple
2246vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2247 tree *type_out)
2248{
2249 gimple last_stmt = VEC_pop (gimple, *stmts);
2250 enum tree_code rhs_code;
2251 tree var, lhs, rhs, vectype;
2252 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2253 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2254 gimple pattern_stmt;
2255
2256 if (!is_gimple_assign (last_stmt))
2257 return NULL;
2258
2259 var = gimple_assign_rhs1 (last_stmt);
2260 lhs = gimple_assign_lhs (last_stmt);
2261
2262 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2263 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2264 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2265 return NULL;
2266
2267 rhs_code = gimple_assign_rhs_code (last_stmt);
2268 if (CONVERT_EXPR_CODE_P (rhs_code))
2269 {
78048b1c
JJ
2270 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2271 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
71c92d17
JJ
2272 return NULL;
2273 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2274 if (vectype == NULL_TREE)
2275 return NULL;
2276
2277 if (!check_bool_pattern (var, loop_vinfo))
2278 return NULL;
2279
2280 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2281 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2282 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2283 pattern_stmt
2284 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2285 else
2286 pattern_stmt
2287 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2288 *type_out = vectype;
2289 *type_in = vectype;
2290 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2291 return pattern_stmt;
2292 }
ab0ef706
JJ
2293 else if (rhs_code == SSA_NAME
2294 && STMT_VINFO_DATA_REF (stmt_vinfo))
2295 {
2296 stmt_vec_info pattern_stmt_info;
2297 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2298 gcc_assert (vectype != NULL_TREE);
78336739
JJ
2299 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2300 return NULL;
ab0ef706
JJ
2301 if (!check_bool_pattern (var, loop_vinfo))
2302 return NULL;
2303
2304 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2305 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2306 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2307 {
2308 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2309 gimple cast_stmt
2310 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
083481d8 2311 new_pattern_def_seq (stmt_vinfo, cast_stmt);
ab0ef706
JJ
2312 rhs = rhs2;
2313 }
2314 pattern_stmt
2315 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2316 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2317 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2318 STMT_VINFO_DATA_REF (pattern_stmt_info)
2319 = STMT_VINFO_DATA_REF (stmt_vinfo);
2320 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2321 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2322 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2323 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2324 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2325 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2326 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2327 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
78048b1c 2328 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
ab0ef706
JJ
2329 *type_out = vectype;
2330 *type_in = vectype;
2331 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2332 return pattern_stmt;
2333 }
71c92d17
JJ
2334 else
2335 return NULL;
2336}
2337
2338
1107f3ae
IR
2339/* Mark statements that are involved in a pattern. */
2340
2341static inline void
2342vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2343 tree pattern_vectype)
2344{
2345 stmt_vec_info pattern_stmt_info, def_stmt_info;
2346 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2347 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2348 gimple def_stmt;
2349
1107f3ae 2350 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
ab0ef706
JJ
2351 if (pattern_stmt_info == NULL)
2352 {
2353 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2354 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2355 }
2356 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1107f3ae
IR
2357
2358 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2359 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
ab0ef706 2360 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1107f3ae
IR
2361 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2362 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2363 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
363477c0
JJ
2364 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2365 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2366 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
1107f3ae 2367 {
363477c0
JJ
2368 gimple_stmt_iterator si;
2369 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2370 !gsi_end_p (si); gsi_next (&si))
69d2aade 2371 {
363477c0
JJ
2372 def_stmt = gsi_stmt (si);
2373 def_stmt_info = vinfo_for_stmt (def_stmt);
2374 if (def_stmt_info == NULL)
2375 {
2376 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
2377 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2378 }
2379 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2380 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2381 STMT_VINFO_DEF_TYPE (def_stmt_info)
2382 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2383 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2384 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
69d2aade 2385 }
1107f3ae
IR
2386 }
2387}
2388
b8698a0f 2389/* Function vect_pattern_recog_1
20f06221
DN
2390
2391 Input:
2392 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2393 computation pattern.
2394 STMT: A stmt from which the pattern search should start.
2395
2396 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
b8698a0f
L
2397 expression that computes the same functionality and can be used to
2398 replace the sequence of stmts that are involved in the pattern.
20f06221
DN
2399
2400 Output:
b8698a0f
L
2401 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2402 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2403 relevant vector type. If 'TYPE_IN' is already a vector type, then this
20f06221
DN
2404 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2405 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2406 to the available target pattern.
2407
b8698a0f 2408 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
2409 for vect_recog_pattern. */
2410
2411static void
92aea285
JJ
2412vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2413 gimple_stmt_iterator si,
2414 VEC (gimple, heap) **stmts_to_replace)
20f06221 2415{
726a989a 2416 gimple stmt = gsi_stmt (si), pattern_stmt;
383d9c83 2417 stmt_vec_info stmt_info;
383d9c83 2418 loop_vec_info loop_vinfo;
20f06221
DN
2419 tree pattern_vectype;
2420 tree type_in, type_out;
20f06221 2421 enum tree_code code;
b5aeb3bb
IR
2422 int i;
2423 gimple next;
20f06221 2424
d1fc143d
JJ
2425 VEC_truncate (gimple, *stmts_to_replace, 0);
2426 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2427 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
726a989a 2428 if (!pattern_stmt)
b8698a0f
L
2429 return;
2430
d1fc143d 2431 stmt = VEC_last (gimple, *stmts_to_replace);
383d9c83
IR
2432 stmt_info = vinfo_for_stmt (stmt);
2433 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2434
b8698a0f
L
2435 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2436 {
2437 /* No need to check target support (already checked by the pattern
2438 recognition function). */
b690cc0f 2439 pattern_vectype = type_out ? type_out : type_in;
20f06221
DN
2440 }
2441 else
2442 {
32e8bb8e 2443 enum machine_mode vec_mode;
20f06221
DN
2444 enum insn_code icode;
2445 optab optab;
2446
2447 /* Check target support */
b690cc0f
RG
2448 type_in = get_vectype_for_scalar_type (type_in);
2449 if (!type_in)
2450 return;
2451 if (type_out)
2452 type_out = get_vectype_for_scalar_type (type_out);
2453 else
2454 type_out = type_in;
15bbc165
AO
2455 if (!type_out)
2456 return;
b690cc0f 2457 pattern_vectype = type_out;
03d3e953 2458
726a989a
RB
2459 if (is_gimple_assign (pattern_stmt))
2460 code = gimple_assign_rhs_code (pattern_stmt);
2461 else
2462 {
2463 gcc_assert (is_gimple_call (pattern_stmt));
2464 code = CALL_EXPR;
2465 }
2466
b690cc0f
RG
2467 optab = optab_for_tree_code (code, type_in, optab_default);
2468 vec_mode = TYPE_MODE (type_in);
20f06221 2469 if (!optab
947131ba 2470 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
b690cc0f 2471 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
20f06221
DN
2472 return;
2473 }
2474
2475 /* Found a vectorizable pattern. */
2476 if (vect_print_dump_info (REPORT_DETAILS))
2477 {
b8698a0f 2478 fprintf (vect_dump, "pattern recognized: ");
726a989a 2479 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
20f06221 2480 }
b8698a0f 2481
726a989a 2482 /* Mark the stmts that are involved in the pattern. */
1107f3ae 2483 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
20f06221 2484
b5aeb3bb
IR
2485 /* Patterns cannot be vectorized using SLP, because they change the order of
2486 computation. */
ac47786e 2487 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
b5aeb3bb
IR
2488 if (next == stmt)
2489 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
51312233 2490
1107f3ae
IR
2491 /* It is possible that additional pattern stmts are created and inserted in
2492 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2493 relevant statements. */
d1fc143d
JJ
2494 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2495 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
51312233
IR
2496 i++)
2497 {
2498 stmt_info = vinfo_for_stmt (stmt);
2499 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2500 if (vect_print_dump_info (REPORT_DETAILS))
2501 {
2502 fprintf (vect_dump, "additional pattern stmt: ");
2503 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2504 }
2505
1107f3ae 2506 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
51312233 2507 }
20f06221
DN
2508}
2509
2510
2511/* Function vect_pattern_recog
2512
2513 Input:
2514 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2515 computation idioms.
2516
9d5e7640
IR
2517 Output - for each computation idiom that is detected we create a new stmt
2518 that provides the same functionality and that can be vectorized. We
20f06221
DN
2519 also record some information in the struct_stmt_info of the relevant
2520 stmts, as explained below:
2521
2522 At the entry to this function we have the following stmts, with the
2523 following initial value in the STMT_VINFO fields:
2524
2525 stmt in_pattern_p related_stmt vec_stmt
2526 S1: a_i = .... - - -
2527 S2: a_2 = ..use(a_i).. - - -
2528 S3: a_1 = ..use(a_2).. - - -
2529 S4: a_0 = ..use(a_1).. - - -
2530 S5: ... = ..use(a_0).. - - -
2531
2532 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
2533 represented by a single stmt. We then:
2534 - create a new stmt S6 equivalent to the pattern (the stmt is not
2535 inserted into the code)
20f06221
DN
2536 - fill in the STMT_VINFO fields as follows:
2537
2538 in_pattern_p related_stmt vec_stmt
b8698a0f 2539 S1: a_i = .... - - -
20f06221
DN
2540 S2: a_2 = ..use(a_i).. - - -
2541 S3: a_1 = ..use(a_2).. - - -
20f06221 2542 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 2543 '---> S6: a_new = .... - S4 -
20f06221
DN
2544 S5: ... = ..use(a_0).. - - -
2545
2546 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 2547 to each other through the RELATED_STMT field).
20f06221
DN
2548
2549 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2550 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2551 remain irrelevant unless used by stmts other than S4.
2552
2553 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 2554 (because they are marked as irrelevant). It will vectorize S6, and record
83197f37
IR
2555 a pointer to the new vector stmt VS6 from S6 (as usual).
2556 S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
2557
2558 in_pattern_p related_stmt vec_stmt
2559 S1: a_i = .... - - -
2560 S2: a_2 = ..use(a_i).. - - -
2561 S3: a_1 = ..use(a_2).. - - -
2562 > VS6: va_new = .... - - -
20f06221 2563 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 2564 '---> S6: a_new = .... - S4 VS6
20f06221
DN
2565 > VS5: ... = ..vuse(va_new).. - - -
2566 S5: ... = ..use(a_0).. - - -
2567
9d5e7640 2568 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
2569 elsewhere), and we'll end up with:
2570
b8698a0f 2571 VS6: va_new = ....
83197f37
IR
2572 VS5: ... = ..vuse(va_new)..
2573
2574 In case of more than one pattern statements, e.g., widen-mult with
2575 intermediate type:
2576
2577 S1 a_t = ;
2578 S2 a_T = (TYPE) a_t;
2579 '--> S3: a_it = (interm_type) a_t;
2580 S4 prod_T = a_T * CONST;
2581 '--> S5: prod_T' = a_it w* CONST;
2582
2583 there may be other users of a_T outside the pattern. In that case S2 will
2584 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2585 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2586 be recorded in S3. */
20f06221
DN
2587
2588void
2589vect_pattern_recog (loop_vec_info loop_vinfo)
2590{
2591 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2592 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2593 unsigned int nbbs = loop->num_nodes;
726a989a 2594 gimple_stmt_iterator si;
20f06221 2595 unsigned int i, j;
92aea285 2596 vect_recog_func_ptr vect_recog_func;
d1fc143d 2597 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
20f06221
DN
2598
2599 if (vect_print_dump_info (REPORT_DETAILS))
2600 fprintf (vect_dump, "=== vect_pattern_recog ===");
2601
2602 /* Scan through the loop stmts, applying the pattern recognition
2603 functions starting at each stmt visited: */
2604 for (i = 0; i < nbbs; i++)
2605 {
2606 basic_block bb = bbs[i];
726a989a 2607 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
20f06221 2608 {
20f06221
DN
2609 /* Scan over all generic vect_recog_xxx_pattern functions. */
2610 for (j = 0; j < NUM_PATTERNS; j++)
2611 {
92aea285
JJ
2612 vect_recog_func = vect_vect_recog_func_ptrs[j];
2613 vect_pattern_recog_1 (vect_recog_func, si,
d1fc143d 2614 &stmts_to_replace);
20f06221
DN
2615 }
2616 }
2617 }
d1fc143d
JJ
2618
2619 VEC_free (gimple, heap, stmts_to_replace);
20f06221 2620}