]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-patterns.c
Merge from trunk.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "stor-layout.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "gimple.h"
32 #include "gimplify.h"
33 #include "gimple-iterator.h"
34 #include "gimple-ssa.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
37 #include "stringpool.h"
38 #include "tree-ssanames.h"
39 #include "cfgloop.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "params.h"
43 #include "tree-data-ref.h"
44 #include "tree-vectorizer.h"
45 #include "recog.h" /* FIXME: for insn_data */
46 #include "diagnostic-core.h"
47 #include "dumpfile.h"
48
49 /* Pattern recognition functions */
50 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
51 tree *);
52 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
53 tree *);
54 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
55 tree *);
56 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
57 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
58 tree *);
59 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
60 tree *, tree *);
61 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
62 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
63 tree *, tree *);
64 static gimple vect_recog_divmod_pattern (vec<gimple> *,
65 tree *, tree *);
66 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
67 tree *, tree *);
68 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
69 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
70 vect_recog_widen_mult_pattern,
71 vect_recog_widen_sum_pattern,
72 vect_recog_dot_prod_pattern,
73 vect_recog_pow_pattern,
74 vect_recog_widen_shift_pattern,
75 vect_recog_over_widening_pattern,
76 vect_recog_rotate_pattern,
77 vect_recog_vector_vector_shift_pattern,
78 vect_recog_divmod_pattern,
79 vect_recog_mixed_size_cond_pattern,
80 vect_recog_bool_pattern};
81
82 static inline void
83 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
84 {
85 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
86 stmt);
87 }
88
89 static inline void
90 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
91 {
92 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
93 append_pattern_def_seq (stmt_info, stmt);
94 }
95
96 /* Check whether STMT2 is in the same loop or basic block as STMT1.
97 Which of the two applies depends on whether we're currently doing
98 loop-based or basic-block-based vectorization, as determined by
99 the vinfo_for_stmt for STMT1 (which must be defined).
100
101 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
102 to be defined as well. */
103
104 static bool
105 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
106 {
107 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
108 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
109 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
110
111 if (!gimple_bb (stmt2))
112 return false;
113
114 if (loop_vinfo)
115 {
116 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
117 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
118 return false;
119 }
120 else
121 {
122 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
123 || gimple_code (stmt2) == GIMPLE_PHI)
124 return false;
125 }
126
127 gcc_assert (vinfo_for_stmt (stmt2));
128 return true;
129 }
130
131 /* If the LHS of DEF_STMT has a single use, and that statement is
132 in the same loop or basic block, return it. */
133
134 static gimple
135 vect_single_imm_use (gimple def_stmt)
136 {
137 tree lhs = gimple_assign_lhs (def_stmt);
138 use_operand_p use_p;
139 gimple use_stmt;
140
141 if (!single_imm_use (lhs, &use_p, &use_stmt))
142 return NULL;
143
144 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
145 return NULL;
146
147 return use_stmt;
148 }
149
150 /* Check whether NAME, an ssa-name used in USE_STMT,
151 is a result of a type promotion or demotion, such that:
152 DEF_STMT: NAME = NOP (name0)
153 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
154 If CHECK_SIGN is TRUE, check that either both types are signed or both are
155 unsigned. */
156
157 static bool
158 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
159 tree *orig_type, gimple *def_stmt, bool *promotion)
160 {
161 tree dummy;
162 gimple dummy_gimple;
163 loop_vec_info loop_vinfo;
164 stmt_vec_info stmt_vinfo;
165 tree type = TREE_TYPE (name);
166 tree oprnd0;
167 enum vect_def_type dt;
168 tree def;
169 bb_vec_info bb_vinfo;
170
171 stmt_vinfo = vinfo_for_stmt (use_stmt);
172 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
173 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
174 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
175 &def, &dt))
176 return false;
177
178 if (dt != vect_internal_def
179 && dt != vect_external_def && dt != vect_constant_def)
180 return false;
181
182 if (!*def_stmt)
183 return false;
184
185 if (!is_gimple_assign (*def_stmt))
186 return false;
187
188 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
189 return false;
190
191 oprnd0 = gimple_assign_rhs1 (*def_stmt);
192
193 *orig_type = TREE_TYPE (oprnd0);
194 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
195 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
196 return false;
197
198 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
199 *promotion = true;
200 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
201 *promotion = false;
202 else
203 return false;
204
205 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
206 bb_vinfo, &dummy_gimple, &dummy, &dt))
207 return false;
208
209 return true;
210 }
211
212 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
213 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
214
215 static tree
216 vect_recog_temp_ssa_var (tree type, gimple stmt)
217 {
218 return make_temp_ssa_name (type, stmt, "patt");
219 }
220
221 /* Function vect_recog_dot_prod_pattern
222
223 Try to find the following pattern:
224
225 type x_t, y_t;
226 TYPE1 prod;
227 TYPE2 sum = init;
228 loop:
229 sum_0 = phi <init, sum_1>
230 S1 x_t = ...
231 S2 y_t = ...
232 S3 x_T = (TYPE1) x_t;
233 S4 y_T = (TYPE1) y_t;
234 S5 prod = x_T * y_T;
235 [S6 prod = (TYPE2) prod; #optional]
236 S7 sum_1 = prod + sum_0;
237
238 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
239 same size of 'TYPE1' or bigger. This is a special case of a reduction
240 computation.
241
242 Input:
243
244 * STMTS: Contains a stmt from which the pattern search begins. In the
245 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
246 will be detected.
247
248 Output:
249
250 * TYPE_IN: The type of the input arguments to the pattern.
251
252 * TYPE_OUT: The type of the output of this pattern.
253
254 * Return value: A new stmt that will be used to replace the sequence of
255 stmts that constitute the pattern. In this case it will be:
256 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
257
258 Note: The dot-prod idiom is a widening reduction pattern that is
259 vectorized without preserving all the intermediate results. It
260 produces only N/2 (widened) results (by summing up pairs of
261 intermediate results) rather than all N results. Therefore, we
262 cannot allow this pattern when we want to get all the results and in
263 the correct order (as is the case when this computation is in an
264 inner-loop nested in an outer-loop that us being vectorized). */
265
266 static gimple
267 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
268 tree *type_out)
269 {
270 gimple stmt, last_stmt = (*stmts)[0];
271 tree oprnd0, oprnd1;
272 tree oprnd00, oprnd01;
273 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
274 tree type, half_type;
275 gimple pattern_stmt;
276 tree prod_type;
277 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
278 struct loop *loop;
279 tree var;
280 bool promotion;
281
282 if (!loop_info)
283 return NULL;
284
285 loop = LOOP_VINFO_LOOP (loop_info);
286
287 if (!is_gimple_assign (last_stmt))
288 return NULL;
289
290 type = gimple_expr_type (last_stmt);
291
292 /* Look for the following pattern
293 DX = (TYPE1) X;
294 DY = (TYPE1) Y;
295 DPROD = DX * DY;
296 DDPROD = (TYPE2) DPROD;
297 sum_1 = DDPROD + sum_0;
298 In which
299 - DX is double the size of X
300 - DY is double the size of Y
301 - DX, DY, DPROD all have the same type
302 - sum is the same size of DPROD or bigger
303 - sum has been recognized as a reduction variable.
304
305 This is equivalent to:
306 DPROD = X w* Y; #widen mult
307 sum_1 = DPROD w+ sum_0; #widen summation
308 or
309 DPROD = X w* Y; #widen mult
310 sum_1 = DPROD + sum_0; #summation
311 */
312
313 /* Starting from LAST_STMT, follow the defs of its uses in search
314 of the above pattern. */
315
316 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
317 return NULL;
318
319 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
320 {
321 /* Has been detected as widening-summation? */
322
323 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
324 type = gimple_expr_type (stmt);
325 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
326 return NULL;
327 oprnd0 = gimple_assign_rhs1 (stmt);
328 oprnd1 = gimple_assign_rhs2 (stmt);
329 half_type = TREE_TYPE (oprnd0);
330 }
331 else
332 {
333 gimple def_stmt;
334
335 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
336 return NULL;
337 oprnd0 = gimple_assign_rhs1 (last_stmt);
338 oprnd1 = gimple_assign_rhs2 (last_stmt);
339 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
340 || !types_compatible_p (TREE_TYPE (oprnd1), type))
341 return NULL;
342 stmt = last_stmt;
343
344 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
345 &promotion)
346 && promotion)
347 {
348 stmt = def_stmt;
349 oprnd0 = gimple_assign_rhs1 (stmt);
350 }
351 else
352 half_type = type;
353 }
354
355 /* So far so good. Since last_stmt was detected as a (summation) reduction,
356 we know that oprnd1 is the reduction variable (defined by a loop-header
357 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
358 Left to check that oprnd0 is defined by a (widen_)mult_expr */
359 if (TREE_CODE (oprnd0) != SSA_NAME)
360 return NULL;
361
362 prod_type = half_type;
363 stmt = SSA_NAME_DEF_STMT (oprnd0);
364
365 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
366 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
367 return NULL;
368
369 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
370 inside the loop (in case we are analyzing an outer-loop). */
371 if (!is_gimple_assign (stmt))
372 return NULL;
373 stmt_vinfo = vinfo_for_stmt (stmt);
374 gcc_assert (stmt_vinfo);
375 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
376 return NULL;
377 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
378 return NULL;
379 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
380 {
381 /* Has been detected as a widening multiplication? */
382
383 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
384 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
385 return NULL;
386 stmt_vinfo = vinfo_for_stmt (stmt);
387 gcc_assert (stmt_vinfo);
388 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
389 oprnd00 = gimple_assign_rhs1 (stmt);
390 oprnd01 = gimple_assign_rhs2 (stmt);
391 }
392 else
393 {
394 tree half_type0, half_type1;
395 gimple def_stmt;
396 tree oprnd0, oprnd1;
397
398 oprnd0 = gimple_assign_rhs1 (stmt);
399 oprnd1 = gimple_assign_rhs2 (stmt);
400 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
401 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
402 return NULL;
403 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
404 &promotion)
405 || !promotion)
406 return NULL;
407 oprnd00 = gimple_assign_rhs1 (def_stmt);
408 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
409 &promotion)
410 || !promotion)
411 return NULL;
412 oprnd01 = gimple_assign_rhs1 (def_stmt);
413 if (!types_compatible_p (half_type0, half_type1))
414 return NULL;
415 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
416 return NULL;
417 }
418
419 half_type = TREE_TYPE (oprnd00);
420 *type_in = half_type;
421 *type_out = type;
422
423 /* Pattern detected. Create a stmt to be used to replace the pattern: */
424 var = vect_recog_temp_ssa_var (type, NULL);
425 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
426 oprnd00, oprnd01, oprnd1);
427
428 if (dump_enabled_p ())
429 {
430 dump_printf_loc (MSG_NOTE, vect_location,
431 "vect_recog_dot_prod_pattern: detected: ");
432 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
433 dump_printf (MSG_NOTE, "\n");
434 }
435
436 /* We don't allow changing the order of the computation in the inner-loop
437 when doing outer-loop vectorization. */
438 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
439
440 return pattern_stmt;
441 }
442
443
444 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
445 and LSHIFT_EXPR.
446
447 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
448 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
449
450 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
451 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
452 that satisfies the above restrictions, we can perform a widening opeartion
453 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
454 with a_it = (interm_type) a_t; */
455
456 static bool
457 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
458 tree const_oprnd, tree *oprnd,
459 vec<gimple> *stmts, tree type,
460 tree *half_type, gimple def_stmt)
461 {
462 tree new_type, new_oprnd;
463 gimple new_stmt;
464
465 if (code != MULT_EXPR && code != LSHIFT_EXPR)
466 return false;
467
468 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
469 || (code == LSHIFT_EXPR
470 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
471 != 1))
472 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
473 {
474 /* CONST_OPRND is a constant of HALF_TYPE. */
475 *oprnd = gimple_assign_rhs1 (def_stmt);
476 return true;
477 }
478
479 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
480 return false;
481
482 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
483 return false;
484
485 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
486 a type 2 times bigger than HALF_TYPE. */
487 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
488 TYPE_UNSIGNED (type));
489 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
490 || (code == LSHIFT_EXPR
491 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
492 return false;
493
494 /* Use NEW_TYPE for widening operation. */
495 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
496 {
497 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
498 /* Check if the already created pattern stmt is what we need. */
499 if (!is_gimple_assign (new_stmt)
500 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
501 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
502 return false;
503
504 stmts->safe_push (def_stmt);
505 *oprnd = gimple_assign_lhs (new_stmt);
506 }
507 else
508 {
509 /* Create a_T = (NEW_TYPE) a_t; */
510 *oprnd = gimple_assign_rhs1 (def_stmt);
511 new_oprnd = make_ssa_name (new_type, NULL);
512 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
513 NULL_TREE);
514 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
515 stmts->safe_push (def_stmt);
516 *oprnd = new_oprnd;
517 }
518
519 *half_type = new_type;
520 return true;
521 }
522
523
524 /* Function vect_recog_widen_mult_pattern
525
526 Try to find the following pattern:
527
528 type a_t, b_t;
529 TYPE a_T, b_T, prod_T;
530
531 S1 a_t = ;
532 S2 b_t = ;
533 S3 a_T = (TYPE) a_t;
534 S4 b_T = (TYPE) b_t;
535 S5 prod_T = a_T * b_T;
536
537 where type 'TYPE' is at least double the size of type 'type'.
538
539 Also detect unsigned cases:
540
541 unsigned type a_t, b_t;
542 unsigned TYPE u_prod_T;
543 TYPE a_T, b_T, prod_T;
544
545 S1 a_t = ;
546 S2 b_t = ;
547 S3 a_T = (TYPE) a_t;
548 S4 b_T = (TYPE) b_t;
549 S5 prod_T = a_T * b_T;
550 S6 u_prod_T = (unsigned TYPE) prod_T;
551
552 and multiplication by constants:
553
554 type a_t;
555 TYPE a_T, prod_T;
556
557 S1 a_t = ;
558 S3 a_T = (TYPE) a_t;
559 S5 prod_T = a_T * CONST;
560
561 A special case of multiplication by constants is when 'TYPE' is 4 times
562 bigger than 'type', but CONST fits an intermediate type 2 times smaller
563 than 'TYPE'. In that case we create an additional pattern stmt for S3
564 to create a variable of the intermediate type, and perform widen-mult
565 on the intermediate type as well:
566
567 type a_t;
568 interm_type a_it;
569 TYPE a_T, prod_T, prod_T';
570
571 S1 a_t = ;
572 S3 a_T = (TYPE) a_t;
573 '--> a_it = (interm_type) a_t;
574 S5 prod_T = a_T * CONST;
575 '--> prod_T' = a_it w* CONST;
576
577 Input/Output:
578
579 * STMTS: Contains a stmt from which the pattern search begins. In the
580 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
581 is detected. In case of unsigned widen-mult, the original stmt (S5) is
582 replaced with S6 in STMTS. In case of multiplication by a constant
583 of an intermediate type (the last case above), STMTS also contains S3
584 (inserted before S5).
585
586 Output:
587
588 * TYPE_IN: The type of the input arguments to the pattern.
589
590 * TYPE_OUT: The type of the output of this pattern.
591
592 * Return value: A new stmt that will be used to replace the sequence of
593 stmts that constitute the pattern. In this case it will be:
594 WIDEN_MULT <a_t, b_t>
595 */
596
597 static gimple
598 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
599 tree *type_in, tree *type_out)
600 {
601 gimple last_stmt = stmts->pop ();
602 gimple def_stmt0, def_stmt1;
603 tree oprnd0, oprnd1;
604 tree type, half_type0, half_type1;
605 gimple pattern_stmt;
606 tree vectype, vectype_out = NULL_TREE;
607 tree var;
608 enum tree_code dummy_code;
609 int dummy_int;
610 vec<tree> dummy_vec;
611 bool op1_ok;
612 bool promotion;
613
614 if (!is_gimple_assign (last_stmt))
615 return NULL;
616
617 type = gimple_expr_type (last_stmt);
618
619 /* Starting from LAST_STMT, follow the defs of its uses in search
620 of the above pattern. */
621
622 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
623 return NULL;
624
625 oprnd0 = gimple_assign_rhs1 (last_stmt);
626 oprnd1 = gimple_assign_rhs2 (last_stmt);
627 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
628 || !types_compatible_p (TREE_TYPE (oprnd1), type))
629 return NULL;
630
631 /* Check argument 0. */
632 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
633 &promotion)
634 || !promotion)
635 return NULL;
636 /* Check argument 1. */
637 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
638 &def_stmt1, &promotion);
639
640 if (op1_ok && promotion)
641 {
642 oprnd0 = gimple_assign_rhs1 (def_stmt0);
643 oprnd1 = gimple_assign_rhs1 (def_stmt1);
644 }
645 else
646 {
647 if (TREE_CODE (oprnd1) == INTEGER_CST
648 && TREE_CODE (half_type0) == INTEGER_TYPE
649 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
650 &oprnd0, stmts, type,
651 &half_type0, def_stmt0))
652 {
653 half_type1 = half_type0;
654 oprnd1 = fold_convert (half_type1, oprnd1);
655 }
656 else
657 return NULL;
658 }
659
660 /* Handle unsigned case. Look for
661 S6 u_prod_T = (unsigned TYPE) prod_T;
662 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
663 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
664 {
665 gimple use_stmt;
666 tree use_lhs;
667 tree use_type;
668
669 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
670 return NULL;
671
672 use_stmt = vect_single_imm_use (last_stmt);
673 if (!use_stmt || !is_gimple_assign (use_stmt)
674 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
675 return NULL;
676
677 use_lhs = gimple_assign_lhs (use_stmt);
678 use_type = TREE_TYPE (use_lhs);
679 if (!INTEGRAL_TYPE_P (use_type)
680 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
681 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
682 return NULL;
683
684 type = use_type;
685 last_stmt = use_stmt;
686 }
687
688 if (!types_compatible_p (half_type0, half_type1))
689 return NULL;
690
691 /* Pattern detected. */
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "vect_recog_widen_mult_pattern: detected:\n");
695
696 /* Check target support */
697 vectype = get_vectype_for_scalar_type (half_type0);
698 vectype_out = get_vectype_for_scalar_type (type);
699 if (!vectype
700 || !vectype_out
701 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
702 vectype_out, vectype,
703 &dummy_code, &dummy_code,
704 &dummy_int, &dummy_vec))
705 return NULL;
706
707 *type_in = vectype;
708 *type_out = vectype_out;
709
710 /* Pattern supported. Create a stmt to be used to replace the pattern: */
711 var = vect_recog_temp_ssa_var (type, NULL);
712 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
713 oprnd1);
714
715 if (dump_enabled_p ())
716 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
717
718 stmts->safe_push (last_stmt);
719 return pattern_stmt;
720 }
721
722
723 /* Function vect_recog_pow_pattern
724
725 Try to find the following pattern:
726
727 x = POW (y, N);
728
729 with POW being one of pow, powf, powi, powif and N being
730 either 2 or 0.5.
731
732 Input:
733
734 * LAST_STMT: A stmt from which the pattern search begins.
735
736 Output:
737
738 * TYPE_IN: The type of the input arguments to the pattern.
739
740 * TYPE_OUT: The type of the output of this pattern.
741
742 * Return value: A new stmt that will be used to replace the sequence of
743 stmts that constitute the pattern. In this case it will be:
744 x = x * x
745 or
746 x = sqrt (x)
747 */
748
749 static gimple
750 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
751 tree *type_out)
752 {
753 gimple last_stmt = (*stmts)[0];
754 tree fn, base, exp = NULL;
755 gimple stmt;
756 tree var;
757
758 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
759 return NULL;
760
761 fn = gimple_call_fndecl (last_stmt);
762 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
763 return NULL;
764
765 switch (DECL_FUNCTION_CODE (fn))
766 {
767 case BUILT_IN_POWIF:
768 case BUILT_IN_POWI:
769 case BUILT_IN_POWF:
770 case BUILT_IN_POW:
771 base = gimple_call_arg (last_stmt, 0);
772 exp = gimple_call_arg (last_stmt, 1);
773 if (TREE_CODE (exp) != REAL_CST
774 && TREE_CODE (exp) != INTEGER_CST)
775 return NULL;
776 break;
777
778 default:
779 return NULL;
780 }
781
782 /* We now have a pow or powi builtin function call with a constant
783 exponent. */
784
785 *type_out = NULL_TREE;
786
787 /* Catch squaring. */
788 if ((tree_fits_shwi_p (exp)
789 && tree_to_shwi (exp) == 2)
790 || (TREE_CODE (exp) == REAL_CST
791 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
792 {
793 *type_in = TREE_TYPE (base);
794
795 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
796 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
797 return stmt;
798 }
799
800 /* Catch square root. */
801 if (TREE_CODE (exp) == REAL_CST
802 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
803 {
804 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
805 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
806 if (*type_in)
807 {
808 gimple stmt = gimple_build_call (newfn, 1, base);
809 if (vectorizable_function (stmt, *type_in, *type_in)
810 != NULL_TREE)
811 {
812 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
813 gimple_call_set_lhs (stmt, var);
814 return stmt;
815 }
816 }
817 }
818
819 return NULL;
820 }
821
822
823 /* Function vect_recog_widen_sum_pattern
824
825 Try to find the following pattern:
826
827 type x_t;
828 TYPE x_T, sum = init;
829 loop:
830 sum_0 = phi <init, sum_1>
831 S1 x_t = *p;
832 S2 x_T = (TYPE) x_t;
833 S3 sum_1 = x_T + sum_0;
834
835 where type 'TYPE' is at least double the size of type 'type', i.e - we're
836 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
837 a special case of a reduction computation.
838
839 Input:
840
841 * LAST_STMT: A stmt from which the pattern search begins. In the example,
842 when this function is called with S3, the pattern {S2,S3} will be detected.
843
844 Output:
845
846 * TYPE_IN: The type of the input arguments to the pattern.
847
848 * TYPE_OUT: The type of the output of this pattern.
849
850 * Return value: A new stmt that will be used to replace the sequence of
851 stmts that constitute the pattern. In this case it will be:
852 WIDEN_SUM <x_t, sum_0>
853
854 Note: The widening-sum idiom is a widening reduction pattern that is
855 vectorized without preserving all the intermediate results. It
856 produces only N/2 (widened) results (by summing up pairs of
857 intermediate results) rather than all N results. Therefore, we
858 cannot allow this pattern when we want to get all the results and in
859 the correct order (as is the case when this computation is in an
860 inner-loop nested in an outer-loop that us being vectorized). */
861
862 static gimple
863 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
864 tree *type_out)
865 {
866 gimple stmt, last_stmt = (*stmts)[0];
867 tree oprnd0, oprnd1;
868 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
869 tree type, half_type;
870 gimple pattern_stmt;
871 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
872 struct loop *loop;
873 tree var;
874 bool promotion;
875
876 if (!loop_info)
877 return NULL;
878
879 loop = LOOP_VINFO_LOOP (loop_info);
880
881 if (!is_gimple_assign (last_stmt))
882 return NULL;
883
884 type = gimple_expr_type (last_stmt);
885
886 /* Look for the following pattern
887 DX = (TYPE) X;
888 sum_1 = DX + sum_0;
889 In which DX is at least double the size of X, and sum_1 has been
890 recognized as a reduction variable.
891 */
892
893 /* Starting from LAST_STMT, follow the defs of its uses in search
894 of the above pattern. */
895
896 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
897 return NULL;
898
899 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
900 return NULL;
901
902 oprnd0 = gimple_assign_rhs1 (last_stmt);
903 oprnd1 = gimple_assign_rhs2 (last_stmt);
904 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
905 || !types_compatible_p (TREE_TYPE (oprnd1), type))
906 return NULL;
907
908 /* So far so good. Since last_stmt was detected as a (summation) reduction,
909 we know that oprnd1 is the reduction variable (defined by a loop-header
910 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
911 Left to check that oprnd0 is defined by a cast from type 'type' to type
912 'TYPE'. */
913
914 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
915 &promotion)
916 || !promotion)
917 return NULL;
918
919 oprnd0 = gimple_assign_rhs1 (stmt);
920 *type_in = half_type;
921 *type_out = type;
922
923 /* Pattern detected. Create a stmt to be used to replace the pattern: */
924 var = vect_recog_temp_ssa_var (type, NULL);
925 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
926 oprnd0, oprnd1);
927
928 if (dump_enabled_p ())
929 {
930 dump_printf_loc (MSG_NOTE, vect_location,
931 "vect_recog_widen_sum_pattern: detected: ");
932 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
933 dump_printf (MSG_NOTE, "\n");
934 }
935
936 /* We don't allow changing the order of the computation in the inner-loop
937 when doing outer-loop vectorization. */
938 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
939
940 return pattern_stmt;
941 }
942
943
944 /* Return TRUE if the operation in STMT can be performed on a smaller type.
945
946 Input:
947 STMT - a statement to check.
948 DEF - we support operations with two operands, one of which is constant.
949 The other operand can be defined by a demotion operation, or by a
950 previous statement in a sequence of over-promoted operations. In the
951 later case DEF is used to replace that operand. (It is defined by a
952 pattern statement we created for the previous statement in the
953 sequence).
954
955 Input/output:
956 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
957 NULL, it's the type of DEF.
958 STMTS - additional pattern statements. If a pattern statement (type
959 conversion) is created in this function, its original statement is
960 added to STMTS.
961
962 Output:
963 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
964 operands to use in the new pattern statement for STMT (will be created
965 in vect_recog_over_widening_pattern ()).
966 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
967 statements for STMT: the first one is a type promotion and the second
968 one is the operation itself. We return the type promotion statement
969 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
970 the second pattern statement. */
971
972 static bool
973 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
974 tree *op0, tree *op1, gimple *new_def_stmt,
975 vec<gimple> *stmts)
976 {
977 enum tree_code code;
978 tree const_oprnd, oprnd;
979 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
980 gimple def_stmt, new_stmt;
981 bool first = false;
982 bool promotion;
983
984 *op0 = NULL_TREE;
985 *op1 = NULL_TREE;
986 *new_def_stmt = NULL;
987
988 if (!is_gimple_assign (stmt))
989 return false;
990
991 code = gimple_assign_rhs_code (stmt);
992 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
993 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
994 return false;
995
996 oprnd = gimple_assign_rhs1 (stmt);
997 const_oprnd = gimple_assign_rhs2 (stmt);
998 type = gimple_expr_type (stmt);
999
1000 if (TREE_CODE (oprnd) != SSA_NAME
1001 || TREE_CODE (const_oprnd) != INTEGER_CST)
1002 return false;
1003
1004 /* If oprnd has other uses besides that in stmt we cannot mark it
1005 as being part of a pattern only. */
1006 if (!has_single_use (oprnd))
1007 return false;
1008
1009 /* If we are in the middle of a sequence, we use DEF from a previous
1010 statement. Otherwise, OPRND has to be a result of type promotion. */
1011 if (*new_type)
1012 {
1013 half_type = *new_type;
1014 oprnd = def;
1015 }
1016 else
1017 {
1018 first = true;
1019 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1020 &promotion)
1021 || !promotion
1022 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1023 return false;
1024 }
1025
1026 /* Can we perform the operation on a smaller type? */
1027 switch (code)
1028 {
1029 case BIT_IOR_EXPR:
1030 case BIT_XOR_EXPR:
1031 case BIT_AND_EXPR:
1032 if (!int_fits_type_p (const_oprnd, half_type))
1033 {
1034 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1035 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1036 return false;
1037
1038 interm_type = build_nonstandard_integer_type (
1039 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1040 if (!int_fits_type_p (const_oprnd, interm_type))
1041 return false;
1042 }
1043
1044 break;
1045
1046 case LSHIFT_EXPR:
1047 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1048 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1049 return false;
1050
1051 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1052 (e.g., if the original value was char, the shift amount is at most 8
1053 if we want to use short). */
1054 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1055 return false;
1056
1057 interm_type = build_nonstandard_integer_type (
1058 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1059
1060 if (!vect_supportable_shift (code, interm_type))
1061 return false;
1062
1063 break;
1064
1065 case RSHIFT_EXPR:
1066 if (vect_supportable_shift (code, half_type))
1067 break;
1068
1069 /* Try intermediate type - HALF_TYPE is not supported. */
1070 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1071 return false;
1072
1073 interm_type = build_nonstandard_integer_type (
1074 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1075
1076 if (!vect_supportable_shift (code, interm_type))
1077 return false;
1078
1079 break;
1080
1081 default:
1082 gcc_unreachable ();
1083 }
1084
1085 /* There are four possible cases:
1086 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1087 the first statement in the sequence)
1088 a. The original, HALF_TYPE, is not enough - we replace the promotion
1089 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1090 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1091 promotion.
1092 2. OPRND is defined by a pattern statement we created.
1093 a. Its type is not sufficient for the operation, we create a new stmt:
1094 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1095 this statement in NEW_DEF_STMT, and it is later put in
1096 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1097 b. OPRND is good to use in the new statement. */
1098 if (first)
1099 {
1100 if (interm_type)
1101 {
1102 /* Replace the original type conversion HALF_TYPE->TYPE with
1103 HALF_TYPE->INTERM_TYPE. */
1104 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1105 {
1106 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1107 /* Check if the already created pattern stmt is what we need. */
1108 if (!is_gimple_assign (new_stmt)
1109 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1110 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1111 return false;
1112
1113 stmts->safe_push (def_stmt);
1114 oprnd = gimple_assign_lhs (new_stmt);
1115 }
1116 else
1117 {
1118 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1119 oprnd = gimple_assign_rhs1 (def_stmt);
1120 new_oprnd = make_ssa_name (interm_type, NULL);
1121 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1122 oprnd, NULL_TREE);
1123 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1124 stmts->safe_push (def_stmt);
1125 oprnd = new_oprnd;
1126 }
1127 }
1128 else
1129 {
1130 /* Retrieve the operand before the type promotion. */
1131 oprnd = gimple_assign_rhs1 (def_stmt);
1132 }
1133 }
1134 else
1135 {
1136 if (interm_type)
1137 {
1138 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1139 new_oprnd = make_ssa_name (interm_type, NULL);
1140 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1141 oprnd, NULL_TREE);
1142 oprnd = new_oprnd;
1143 *new_def_stmt = new_stmt;
1144 }
1145
1146 /* Otherwise, OPRND is already set. */
1147 }
1148
1149 if (interm_type)
1150 *new_type = interm_type;
1151 else
1152 *new_type = half_type;
1153
1154 *op0 = oprnd;
1155 *op1 = fold_convert (*new_type, const_oprnd);
1156
1157 return true;
1158 }
1159
1160
1161 /* Try to find a statement or a sequence of statements that can be performed
1162 on a smaller type:
1163
1164 type x_t;
1165 TYPE x_T, res0_T, res1_T;
1166 loop:
1167 S1 x_t = *p;
1168 S2 x_T = (TYPE) x_t;
1169 S3 res0_T = op (x_T, C0);
1170 S4 res1_T = op (res0_T, C1);
1171 S5 ... = () res1_T; - type demotion
1172
1173 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1174 constants.
1175 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1176 be 'type' or some intermediate type. For now, we expect S5 to be a type
1177 demotion operation. We also check that S3 and S4 have only one use. */
1178
1179 static gimple
1180 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1181 tree *type_in, tree *type_out)
1182 {
1183 gimple stmt = stmts->pop ();
1184 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1185 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1186 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1187 bool first;
1188 tree type = NULL;
1189
1190 first = true;
1191 while (1)
1192 {
1193 if (!vinfo_for_stmt (stmt)
1194 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1195 return NULL;
1196
1197 new_def_stmt = NULL;
1198 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1199 &op0, &op1, &new_def_stmt,
1200 stmts))
1201 {
1202 if (first)
1203 return NULL;
1204 else
1205 break;
1206 }
1207
1208 /* STMT can be performed on a smaller type. Check its uses. */
1209 use_stmt = vect_single_imm_use (stmt);
1210 if (!use_stmt || !is_gimple_assign (use_stmt))
1211 return NULL;
1212
1213 /* Create pattern statement for STMT. */
1214 vectype = get_vectype_for_scalar_type (new_type);
1215 if (!vectype)
1216 return NULL;
1217
1218 /* We want to collect all the statements for which we create pattern
1219 statetments, except for the case when the last statement in the
1220 sequence doesn't have a corresponding pattern statement. In such
1221 case we associate the last pattern statement with the last statement
1222 in the sequence. Therefore, we only add the original statement to
1223 the list if we know that it is not the last. */
1224 if (prev_stmt)
1225 stmts->safe_push (prev_stmt);
1226
1227 var = vect_recog_temp_ssa_var (new_type, NULL);
1228 pattern_stmt
1229 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1230 op0, op1);
1231 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1232 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1233
1234 if (dump_enabled_p ())
1235 {
1236 dump_printf_loc (MSG_NOTE, vect_location,
1237 "created pattern stmt: ");
1238 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1239 dump_printf (MSG_NOTE, "\n");
1240 }
1241
1242 type = gimple_expr_type (stmt);
1243 prev_stmt = stmt;
1244 stmt = use_stmt;
1245
1246 first = false;
1247 }
1248
1249 /* We got a sequence. We expect it to end with a type demotion operation.
1250 Otherwise, we quit (for now). There are three possible cases: the
1251 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1252 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1253 NEW_TYPE differs (we create a new conversion statement). */
1254 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1255 {
1256 use_lhs = gimple_assign_lhs (use_stmt);
1257 use_type = TREE_TYPE (use_lhs);
1258 /* Support only type demotion or signedess change. */
1259 if (!INTEGRAL_TYPE_P (use_type)
1260 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1261 return NULL;
1262
1263 /* Check that NEW_TYPE is not bigger than the conversion result. */
1264 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1265 return NULL;
1266
1267 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1268 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1269 {
1270 /* Create NEW_TYPE->USE_TYPE conversion. */
1271 new_oprnd = make_ssa_name (use_type, NULL);
1272 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1273 var, NULL_TREE);
1274 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1275
1276 *type_in = get_vectype_for_scalar_type (new_type);
1277 *type_out = get_vectype_for_scalar_type (use_type);
1278
1279 /* We created a pattern statement for the last statement in the
1280 sequence, so we don't need to associate it with the pattern
1281 statement created for PREV_STMT. Therefore, we add PREV_STMT
1282 to the list in order to mark it later in vect_pattern_recog_1. */
1283 if (prev_stmt)
1284 stmts->safe_push (prev_stmt);
1285 }
1286 else
1287 {
1288 if (prev_stmt)
1289 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1290 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1291
1292 *type_in = vectype;
1293 *type_out = NULL_TREE;
1294 }
1295
1296 stmts->safe_push (use_stmt);
1297 }
1298 else
1299 /* TODO: support general case, create a conversion to the correct type. */
1300 return NULL;
1301
1302 /* Pattern detected. */
1303 if (dump_enabled_p ())
1304 {
1305 dump_printf_loc (MSG_NOTE, vect_location,
1306 "vect_recog_over_widening_pattern: detected: ");
1307 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1308 dump_printf (MSG_NOTE, "\n");
1309 }
1310
1311 return pattern_stmt;
1312 }
1313
1314 /* Detect widening shift pattern:
1315
1316 type a_t;
1317 TYPE a_T, res_T;
1318
1319 S1 a_t = ;
1320 S2 a_T = (TYPE) a_t;
1321 S3 res_T = a_T << CONST;
1322
1323 where type 'TYPE' is at least double the size of type 'type'.
1324
1325 Also detect cases where the shift result is immediately converted
1326 to another type 'result_type' that is no larger in size than 'TYPE'.
1327 In those cases we perform a widen-shift that directly results in
1328 'result_type', to avoid a possible over-widening situation:
1329
1330 type a_t;
1331 TYPE a_T, res_T;
1332 result_type res_result;
1333
1334 S1 a_t = ;
1335 S2 a_T = (TYPE) a_t;
1336 S3 res_T = a_T << CONST;
1337 S4 res_result = (result_type) res_T;
1338 '--> res_result' = a_t w<< CONST;
1339
1340 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1341 create an additional pattern stmt for S2 to create a variable of an
1342 intermediate type, and perform widen-shift on the intermediate type:
1343
1344 type a_t;
1345 interm_type a_it;
1346 TYPE a_T, res_T, res_T';
1347
1348 S1 a_t = ;
1349 S2 a_T = (TYPE) a_t;
1350 '--> a_it = (interm_type) a_t;
1351 S3 res_T = a_T << CONST;
1352 '--> res_T' = a_it <<* CONST;
1353
1354 Input/Output:
1355
1356 * STMTS: Contains a stmt from which the pattern search begins.
1357 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1358 in STMTS. When an intermediate type is used and a pattern statement is
1359 created for S2, we also put S2 here (before S3).
1360
1361 Output:
1362
1363 * TYPE_IN: The type of the input arguments to the pattern.
1364
1365 * TYPE_OUT: The type of the output of this pattern.
1366
1367 * Return value: A new stmt that will be used to replace the sequence of
1368 stmts that constitute the pattern. In this case it will be:
1369 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1370
1371 static gimple
1372 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1373 tree *type_in, tree *type_out)
1374 {
1375 gimple last_stmt = stmts->pop ();
1376 gimple def_stmt0;
1377 tree oprnd0, oprnd1;
1378 tree type, half_type0;
1379 gimple pattern_stmt;
1380 tree vectype, vectype_out = NULL_TREE;
1381 tree var;
1382 enum tree_code dummy_code;
1383 int dummy_int;
1384 vec<tree> dummy_vec;
1385 gimple use_stmt;
1386 bool promotion;
1387
1388 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1389 return NULL;
1390
1391 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1392 return NULL;
1393
1394 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1395 return NULL;
1396
1397 oprnd0 = gimple_assign_rhs1 (last_stmt);
1398 oprnd1 = gimple_assign_rhs2 (last_stmt);
1399 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1400 return NULL;
1401
1402 /* Check operand 0: it has to be defined by a type promotion. */
1403 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1404 &promotion)
1405 || !promotion)
1406 return NULL;
1407
1408 /* Check operand 1: has to be positive. We check that it fits the type
1409 in vect_handle_widen_op_by_const (). */
1410 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1411 return NULL;
1412
1413 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1414 type = gimple_expr_type (last_stmt);
1415
1416 /* Check for subsequent conversion to another type. */
1417 use_stmt = vect_single_imm_use (last_stmt);
1418 if (use_stmt && is_gimple_assign (use_stmt)
1419 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1420 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1421 {
1422 tree use_lhs = gimple_assign_lhs (use_stmt);
1423 tree use_type = TREE_TYPE (use_lhs);
1424
1425 if (INTEGRAL_TYPE_P (use_type)
1426 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1427 {
1428 last_stmt = use_stmt;
1429 type = use_type;
1430 }
1431 }
1432
1433 /* Check if this a widening operation. */
1434 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1435 &oprnd0, stmts,
1436 type, &half_type0, def_stmt0))
1437 return NULL;
1438
1439 /* Pattern detected. */
1440 if (dump_enabled_p ())
1441 dump_printf_loc (MSG_NOTE, vect_location,
1442 "vect_recog_widen_shift_pattern: detected:\n");
1443
1444 /* Check target support. */
1445 vectype = get_vectype_for_scalar_type (half_type0);
1446 vectype_out = get_vectype_for_scalar_type (type);
1447
1448 if (!vectype
1449 || !vectype_out
1450 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1451 vectype_out, vectype,
1452 &dummy_code, &dummy_code,
1453 &dummy_int, &dummy_vec))
1454 return NULL;
1455
1456 *type_in = vectype;
1457 *type_out = vectype_out;
1458
1459 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1460 var = vect_recog_temp_ssa_var (type, NULL);
1461 pattern_stmt =
1462 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1463
1464 if (dump_enabled_p ())
1465 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1466
1467 stmts->safe_push (last_stmt);
1468 return pattern_stmt;
1469 }
1470
1471 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1472
1473 type a_t, b_t, c_t;
1474
1475 S0 a_t = b_t r<< c_t;
1476
1477 Input/Output:
1478
1479 * STMTS: Contains a stmt from which the pattern search begins,
1480 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1481 with a sequence:
1482
1483 S1 d_t = -c_t;
1484 S2 e_t = d_t & (B - 1);
1485 S3 f_t = b_t << c_t;
1486 S4 g_t = b_t >> e_t;
1487 S0 a_t = f_t | g_t;
1488
1489 where B is element bitsize of type.
1490
1491 Output:
1492
1493 * TYPE_IN: The type of the input arguments to the pattern.
1494
1495 * TYPE_OUT: The type of the output of this pattern.
1496
1497 * Return value: A new stmt that will be used to replace the rotate
1498 S0 stmt. */
1499
1500 static gimple
1501 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1502 {
1503 gimple last_stmt = stmts->pop ();
1504 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1505 gimple pattern_stmt, def_stmt;
1506 enum tree_code rhs_code;
1507 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1508 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1509 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1510 enum vect_def_type dt;
1511 optab optab1, optab2;
1512 edge ext_def = NULL;
1513
1514 if (!is_gimple_assign (last_stmt))
1515 return NULL;
1516
1517 rhs_code = gimple_assign_rhs_code (last_stmt);
1518 switch (rhs_code)
1519 {
1520 case LROTATE_EXPR:
1521 case RROTATE_EXPR:
1522 break;
1523 default:
1524 return NULL;
1525 }
1526
1527 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1528 return NULL;
1529
1530 lhs = gimple_assign_lhs (last_stmt);
1531 oprnd0 = gimple_assign_rhs1 (last_stmt);
1532 type = TREE_TYPE (oprnd0);
1533 oprnd1 = gimple_assign_rhs2 (last_stmt);
1534 if (TREE_CODE (oprnd0) != SSA_NAME
1535 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1536 || !INTEGRAL_TYPE_P (type)
1537 || !TYPE_UNSIGNED (type))
1538 return NULL;
1539
1540 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1541 &def, &dt))
1542 return NULL;
1543
1544 if (dt != vect_internal_def
1545 && dt != vect_constant_def
1546 && dt != vect_external_def)
1547 return NULL;
1548
1549 vectype = get_vectype_for_scalar_type (type);
1550 if (vectype == NULL_TREE)
1551 return NULL;
1552
1553 /* If vector/vector or vector/scalar rotate is supported by the target,
1554 don't do anything here. */
1555 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1556 if (optab1
1557 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1558 return NULL;
1559
1560 if (bb_vinfo != NULL || dt != vect_internal_def)
1561 {
1562 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1563 if (optab2
1564 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1565 return NULL;
1566 }
1567
1568 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1569 don't do anything here either. */
1570 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1571 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1572 if (!optab1
1573 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1574 || !optab2
1575 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1576 {
1577 if (bb_vinfo == NULL && dt == vect_internal_def)
1578 return NULL;
1579 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1580 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1581 if (!optab1
1582 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1583 || !optab2
1584 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1585 return NULL;
1586 }
1587
1588 *type_in = vectype;
1589 *type_out = vectype;
1590 if (*type_in == NULL_TREE)
1591 return NULL;
1592
1593 if (dt == vect_external_def
1594 && TREE_CODE (oprnd1) == SSA_NAME
1595 && loop_vinfo)
1596 {
1597 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1598 ext_def = loop_preheader_edge (loop);
1599 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1600 {
1601 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1602 if (bb == NULL
1603 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1604 ext_def = NULL;
1605 }
1606 }
1607
1608 def = NULL_TREE;
1609 if (TREE_CODE (oprnd1) == INTEGER_CST
1610 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1611 def = oprnd1;
1612 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1613 {
1614 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1615 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1616 && TYPE_PRECISION (TREE_TYPE (rhs1))
1617 == TYPE_PRECISION (type))
1618 def = rhs1;
1619 }
1620
1621 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1622 if (def == NULL_TREE)
1623 {
1624 def = vect_recog_temp_ssa_var (type, NULL);
1625 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1626 NULL_TREE);
1627 if (ext_def)
1628 {
1629 basic_block new_bb
1630 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1631 gcc_assert (!new_bb);
1632 }
1633 else
1634 append_pattern_def_seq (stmt_vinfo, def_stmt);
1635 }
1636 stype = TREE_TYPE (def);
1637
1638 if (TREE_CODE (def) == INTEGER_CST)
1639 {
1640 if (!tree_fits_uhwi_p (def)
1641 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1642 || integer_zerop (def))
1643 return NULL;
1644 def2 = build_int_cst (stype,
1645 GET_MODE_PRECISION (TYPE_MODE (type))
1646 - tree_to_uhwi (def));
1647 }
1648 else
1649 {
1650 tree vecstype = get_vectype_for_scalar_type (stype);
1651 stmt_vec_info def_stmt_vinfo;
1652
1653 if (vecstype == NULL_TREE)
1654 return NULL;
1655 def2 = vect_recog_temp_ssa_var (stype, NULL);
1656 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1657 NULL_TREE);
1658 if (ext_def)
1659 {
1660 basic_block new_bb
1661 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1662 gcc_assert (!new_bb);
1663 }
1664 else
1665 {
1666 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1667 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1668 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1669 append_pattern_def_seq (stmt_vinfo, def_stmt);
1670 }
1671
1672 def2 = vect_recog_temp_ssa_var (stype, NULL);
1673 tree mask
1674 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1675 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1676 gimple_assign_lhs (def_stmt),
1677 mask);
1678 if (ext_def)
1679 {
1680 basic_block new_bb
1681 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1682 gcc_assert (!new_bb);
1683 }
1684 else
1685 {
1686 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1687 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1688 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1689 append_pattern_def_seq (stmt_vinfo, def_stmt);
1690 }
1691 }
1692
1693 var1 = vect_recog_temp_ssa_var (type, NULL);
1694 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1695 ? LSHIFT_EXPR : RSHIFT_EXPR,
1696 var1, oprnd0, def);
1697 append_pattern_def_seq (stmt_vinfo, def_stmt);
1698
1699 var2 = vect_recog_temp_ssa_var (type, NULL);
1700 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1701 ? RSHIFT_EXPR : LSHIFT_EXPR,
1702 var2, oprnd0, def2);
1703 append_pattern_def_seq (stmt_vinfo, def_stmt);
1704
1705 /* Pattern detected. */
1706 if (dump_enabled_p ())
1707 dump_printf_loc (MSG_NOTE, vect_location,
1708 "vect_recog_rotate_pattern: detected:\n");
1709
1710 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1711 var = vect_recog_temp_ssa_var (type, NULL);
1712 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1713
1714 if (dump_enabled_p ())
1715 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1716
1717 stmts->safe_push (last_stmt);
1718 return pattern_stmt;
1719 }
1720
1721 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1722 vectorized:
1723
1724 type a_t;
1725 TYPE b_T, res_T;
1726
1727 S1 a_t = ;
1728 S2 b_T = ;
1729 S3 res_T = b_T op a_t;
1730
1731 where type 'TYPE' is a type with different size than 'type',
1732 and op is <<, >> or rotate.
1733
1734 Also detect cases:
1735
1736 type a_t;
1737 TYPE b_T, c_T, res_T;
1738
1739 S0 c_T = ;
1740 S1 a_t = (type) c_T;
1741 S2 b_T = ;
1742 S3 res_T = b_T op a_t;
1743
1744 Input/Output:
1745
1746 * STMTS: Contains a stmt from which the pattern search begins,
1747 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1748 with a shift/rotate which has same type on both operands, in the
1749 second case just b_T op c_T, in the first case with added cast
1750 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1751
1752 Output:
1753
1754 * TYPE_IN: The type of the input arguments to the pattern.
1755
1756 * TYPE_OUT: The type of the output of this pattern.
1757
1758 * Return value: A new stmt that will be used to replace the shift/rotate
1759 S3 stmt. */
1760
1761 static gimple
1762 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1763 tree *type_in, tree *type_out)
1764 {
1765 gimple last_stmt = stmts->pop ();
1766 tree oprnd0, oprnd1, lhs, var;
1767 gimple pattern_stmt, def_stmt;
1768 enum tree_code rhs_code;
1769 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1770 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1771 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1772 enum vect_def_type dt;
1773 tree def;
1774
1775 if (!is_gimple_assign (last_stmt))
1776 return NULL;
1777
1778 rhs_code = gimple_assign_rhs_code (last_stmt);
1779 switch (rhs_code)
1780 {
1781 case LSHIFT_EXPR:
1782 case RSHIFT_EXPR:
1783 case LROTATE_EXPR:
1784 case RROTATE_EXPR:
1785 break;
1786 default:
1787 return NULL;
1788 }
1789
1790 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1791 return NULL;
1792
1793 lhs = gimple_assign_lhs (last_stmt);
1794 oprnd0 = gimple_assign_rhs1 (last_stmt);
1795 oprnd1 = gimple_assign_rhs2 (last_stmt);
1796 if (TREE_CODE (oprnd0) != SSA_NAME
1797 || TREE_CODE (oprnd1) != SSA_NAME
1798 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1799 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1800 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1801 || TYPE_PRECISION (TREE_TYPE (lhs))
1802 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1803 return NULL;
1804
1805 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1806 &def, &dt))
1807 return NULL;
1808
1809 if (dt != vect_internal_def)
1810 return NULL;
1811
1812 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1813 *type_out = *type_in;
1814 if (*type_in == NULL_TREE)
1815 return NULL;
1816
1817 def = NULL_TREE;
1818 if (gimple_assign_cast_p (def_stmt))
1819 {
1820 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1821 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1822 && TYPE_PRECISION (TREE_TYPE (rhs1))
1823 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1824 def = rhs1;
1825 }
1826
1827 if (def == NULL_TREE)
1828 {
1829 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1830 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1831 NULL_TREE);
1832 new_pattern_def_seq (stmt_vinfo, def_stmt);
1833 }
1834
1835 /* Pattern detected. */
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_NOTE, vect_location,
1838 "vect_recog_vector_vector_shift_pattern: detected:\n");
1839
1840 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1841 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1842 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1843
1844 if (dump_enabled_p ())
1845 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1846
1847 stmts->safe_push (last_stmt);
1848 return pattern_stmt;
1849 }
1850
1851 /* Detect a signed division by a constant that wouldn't be
1852 otherwise vectorized:
1853
1854 type a_t, b_t;
1855
1856 S1 a_t = b_t / N;
1857
1858 where type 'type' is an integral type and N is a constant.
1859
1860 Similarly handle modulo by a constant:
1861
1862 S4 a_t = b_t % N;
1863
1864 Input/Output:
1865
1866 * STMTS: Contains a stmt from which the pattern search begins,
1867 i.e. the division stmt. S1 is replaced by if N is a power
1868 of two constant and type is signed:
1869 S3 y_t = b_t < 0 ? N - 1 : 0;
1870 S2 x_t = b_t + y_t;
1871 S1' a_t = x_t >> log2 (N);
1872
1873 S4 is replaced if N is a power of two constant and
1874 type is signed by (where *_T temporaries have unsigned type):
1875 S9 y_T = b_t < 0 ? -1U : 0U;
1876 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1877 S7 z_t = (type) z_T;
1878 S6 w_t = b_t + z_t;
1879 S5 x_t = w_t & (N - 1);
1880 S4' a_t = x_t - z_t;
1881
1882 Output:
1883
1884 * TYPE_IN: The type of the input arguments to the pattern.
1885
1886 * TYPE_OUT: The type of the output of this pattern.
1887
1888 * Return value: A new stmt that will be used to replace the division
1889 S1 or modulo S4 stmt. */
1890
1891 static gimple
1892 vect_recog_divmod_pattern (vec<gimple> *stmts,
1893 tree *type_in, tree *type_out)
1894 {
1895 gimple last_stmt = stmts->pop ();
1896 tree oprnd0, oprnd1, vectype, itype, cond;
1897 gimple pattern_stmt, def_stmt;
1898 enum tree_code rhs_code;
1899 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1900 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1901 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1902 optab optab;
1903 tree q;
1904 int dummy_int, prec;
1905 stmt_vec_info def_stmt_vinfo;
1906
1907 if (!is_gimple_assign (last_stmt))
1908 return NULL;
1909
1910 rhs_code = gimple_assign_rhs_code (last_stmt);
1911 switch (rhs_code)
1912 {
1913 case TRUNC_DIV_EXPR:
1914 case TRUNC_MOD_EXPR:
1915 break;
1916 default:
1917 return NULL;
1918 }
1919
1920 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1921 return NULL;
1922
1923 oprnd0 = gimple_assign_rhs1 (last_stmt);
1924 oprnd1 = gimple_assign_rhs2 (last_stmt);
1925 itype = TREE_TYPE (oprnd0);
1926 if (TREE_CODE (oprnd0) != SSA_NAME
1927 || TREE_CODE (oprnd1) != INTEGER_CST
1928 || TREE_CODE (itype) != INTEGER_TYPE
1929 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1930 return NULL;
1931
1932 vectype = get_vectype_for_scalar_type (itype);
1933 if (vectype == NULL_TREE)
1934 return NULL;
1935
1936 /* If the target can handle vectorized division or modulo natively,
1937 don't attempt to optimize this. */
1938 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1939 if (optab != unknown_optab)
1940 {
1941 enum machine_mode vec_mode = TYPE_MODE (vectype);
1942 int icode = (int) optab_handler (optab, vec_mode);
1943 if (icode != CODE_FOR_nothing)
1944 return NULL;
1945 }
1946
1947 prec = TYPE_PRECISION (itype);
1948 if (integer_pow2p (oprnd1))
1949 {
1950 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1951 return NULL;
1952
1953 /* Pattern detected. */
1954 if (dump_enabled_p ())
1955 dump_printf_loc (MSG_NOTE, vect_location,
1956 "vect_recog_divmod_pattern: detected:\n");
1957
1958 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1959 build_int_cst (itype, 0));
1960 if (rhs_code == TRUNC_DIV_EXPR)
1961 {
1962 tree var = vect_recog_temp_ssa_var (itype, NULL);
1963 tree shift;
1964 def_stmt
1965 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1966 fold_build2 (MINUS_EXPR, itype,
1967 oprnd1,
1968 build_int_cst (itype,
1969 1)),
1970 build_int_cst (itype, 0));
1971 new_pattern_def_seq (stmt_vinfo, def_stmt);
1972 var = vect_recog_temp_ssa_var (itype, NULL);
1973 def_stmt
1974 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1975 gimple_assign_lhs (def_stmt));
1976 append_pattern_def_seq (stmt_vinfo, def_stmt);
1977
1978 shift = build_int_cst (itype, tree_log2 (oprnd1));
1979 pattern_stmt
1980 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1981 vect_recog_temp_ssa_var (itype,
1982 NULL),
1983 var, shift);
1984 }
1985 else
1986 {
1987 tree signmask;
1988 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1989 if (compare_tree_int (oprnd1, 2) == 0)
1990 {
1991 signmask = vect_recog_temp_ssa_var (itype, NULL);
1992 def_stmt
1993 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1994 build_int_cst (itype, 1),
1995 build_int_cst (itype, 0));
1996 append_pattern_def_seq (stmt_vinfo, def_stmt);
1997 }
1998 else
1999 {
2000 tree utype
2001 = build_nonstandard_integer_type (prec, 1);
2002 tree vecutype = get_vectype_for_scalar_type (utype);
2003 tree shift
2004 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2005 - tree_log2 (oprnd1));
2006 tree var = vect_recog_temp_ssa_var (utype, NULL);
2007
2008 def_stmt
2009 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2010 build_int_cst (utype, -1),
2011 build_int_cst (utype, 0));
2012 def_stmt_vinfo
2013 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2014 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2015 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2016 append_pattern_def_seq (stmt_vinfo, def_stmt);
2017 var = vect_recog_temp_ssa_var (utype, NULL);
2018 def_stmt
2019 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2020 gimple_assign_lhs (def_stmt),
2021 shift);
2022 def_stmt_vinfo
2023 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2024 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2025 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2026 append_pattern_def_seq (stmt_vinfo, def_stmt);
2027 signmask = vect_recog_temp_ssa_var (itype, NULL);
2028 def_stmt
2029 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2030 NULL_TREE);
2031 append_pattern_def_seq (stmt_vinfo, def_stmt);
2032 }
2033 def_stmt
2034 = gimple_build_assign_with_ops (PLUS_EXPR,
2035 vect_recog_temp_ssa_var (itype,
2036 NULL),
2037 oprnd0, signmask);
2038 append_pattern_def_seq (stmt_vinfo, def_stmt);
2039 def_stmt
2040 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2041 vect_recog_temp_ssa_var (itype,
2042 NULL),
2043 gimple_assign_lhs (def_stmt),
2044 fold_build2 (MINUS_EXPR, itype,
2045 oprnd1,
2046 build_int_cst (itype,
2047 1)));
2048 append_pattern_def_seq (stmt_vinfo, def_stmt);
2049
2050 pattern_stmt
2051 = gimple_build_assign_with_ops (MINUS_EXPR,
2052 vect_recog_temp_ssa_var (itype,
2053 NULL),
2054 gimple_assign_lhs (def_stmt),
2055 signmask);
2056 }
2057
2058 if (dump_enabled_p ())
2059 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2060 0);
2061
2062 stmts->safe_push (last_stmt);
2063
2064 *type_in = vectype;
2065 *type_out = vectype;
2066 return pattern_stmt;
2067 }
2068
2069 if (prec > HOST_BITS_PER_WIDE_INT
2070 || integer_zerop (oprnd1))
2071 return NULL;
2072
2073 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2074 return NULL;
2075
2076 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2077
2078 if (TYPE_UNSIGNED (itype))
2079 {
2080 unsigned HOST_WIDE_INT mh, ml;
2081 int pre_shift, post_shift;
2082 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2083 & GET_MODE_MASK (TYPE_MODE (itype)));
2084 tree t1, t2, t3, t4;
2085
2086 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2087 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2088 return NULL;
2089
2090 /* Find a suitable multiplier and right shift count
2091 instead of multiplying with D. */
2092 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2093
2094 /* If the suggested multiplier is more than SIZE bits, we can do better
2095 for even divisors, using an initial right shift. */
2096 if (mh != 0 && (d & 1) == 0)
2097 {
2098 pre_shift = floor_log2 (d & -d);
2099 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2100 &ml, &post_shift, &dummy_int);
2101 gcc_assert (!mh);
2102 }
2103 else
2104 pre_shift = 0;
2105
2106 if (mh != 0)
2107 {
2108 if (post_shift - 1 >= prec)
2109 return NULL;
2110
2111 /* t1 = oprnd0 h* ml;
2112 t2 = oprnd0 - t1;
2113 t3 = t2 >> 1;
2114 t4 = t1 + t3;
2115 q = t4 >> (post_shift - 1); */
2116 t1 = vect_recog_temp_ssa_var (itype, NULL);
2117 def_stmt
2118 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2119 build_int_cst (itype, ml));
2120 append_pattern_def_seq (stmt_vinfo, def_stmt);
2121
2122 t2 = vect_recog_temp_ssa_var (itype, NULL);
2123 def_stmt
2124 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2125 append_pattern_def_seq (stmt_vinfo, def_stmt);
2126
2127 t3 = vect_recog_temp_ssa_var (itype, NULL);
2128 def_stmt
2129 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2130 integer_one_node);
2131 append_pattern_def_seq (stmt_vinfo, def_stmt);
2132
2133 t4 = vect_recog_temp_ssa_var (itype, NULL);
2134 def_stmt
2135 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2136
2137 if (post_shift != 1)
2138 {
2139 append_pattern_def_seq (stmt_vinfo, def_stmt);
2140
2141 q = vect_recog_temp_ssa_var (itype, NULL);
2142 pattern_stmt
2143 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2144 build_int_cst (itype,
2145 post_shift
2146 - 1));
2147 }
2148 else
2149 {
2150 q = t4;
2151 pattern_stmt = def_stmt;
2152 }
2153 }
2154 else
2155 {
2156 if (pre_shift >= prec || post_shift >= prec)
2157 return NULL;
2158
2159 /* t1 = oprnd0 >> pre_shift;
2160 t2 = t1 h* ml;
2161 q = t2 >> post_shift; */
2162 if (pre_shift)
2163 {
2164 t1 = vect_recog_temp_ssa_var (itype, NULL);
2165 def_stmt
2166 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2167 build_int_cst (NULL,
2168 pre_shift));
2169 append_pattern_def_seq (stmt_vinfo, def_stmt);
2170 }
2171 else
2172 t1 = oprnd0;
2173
2174 t2 = vect_recog_temp_ssa_var (itype, NULL);
2175 def_stmt
2176 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2177 build_int_cst (itype, ml));
2178
2179 if (post_shift)
2180 {
2181 append_pattern_def_seq (stmt_vinfo, def_stmt);
2182
2183 q = vect_recog_temp_ssa_var (itype, NULL);
2184 def_stmt
2185 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2186 build_int_cst (itype,
2187 post_shift));
2188 }
2189 else
2190 q = t2;
2191
2192 pattern_stmt = def_stmt;
2193 }
2194 }
2195 else
2196 {
2197 unsigned HOST_WIDE_INT ml;
2198 int post_shift;
2199 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2200 unsigned HOST_WIDE_INT abs_d;
2201 bool add = false;
2202 tree t1, t2, t3, t4;
2203
2204 /* Give up for -1. */
2205 if (d == -1)
2206 return NULL;
2207
2208 /* Since d might be INT_MIN, we have to cast to
2209 unsigned HOST_WIDE_INT before negating to avoid
2210 undefined signed overflow. */
2211 abs_d = (d >= 0
2212 ? (unsigned HOST_WIDE_INT) d
2213 : - (unsigned HOST_WIDE_INT) d);
2214
2215 /* n rem d = n rem -d */
2216 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2217 {
2218 d = abs_d;
2219 oprnd1 = build_int_cst (itype, abs_d);
2220 }
2221 else if (HOST_BITS_PER_WIDE_INT >= prec
2222 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2223 /* This case is not handled correctly below. */
2224 return NULL;
2225
2226 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2227 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2228 {
2229 add = true;
2230 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2231 }
2232 if (post_shift >= prec)
2233 return NULL;
2234
2235 /* t1 = oprnd0 h* ml; */
2236 t1 = vect_recog_temp_ssa_var (itype, NULL);
2237 def_stmt
2238 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2239 build_int_cst (itype, ml));
2240
2241 if (add)
2242 {
2243 /* t2 = t1 + oprnd0; */
2244 append_pattern_def_seq (stmt_vinfo, def_stmt);
2245 t2 = vect_recog_temp_ssa_var (itype, NULL);
2246 def_stmt
2247 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2248 }
2249 else
2250 t2 = t1;
2251
2252 if (post_shift)
2253 {
2254 /* t3 = t2 >> post_shift; */
2255 append_pattern_def_seq (stmt_vinfo, def_stmt);
2256 t3 = vect_recog_temp_ssa_var (itype, NULL);
2257 def_stmt
2258 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2259 build_int_cst (itype, post_shift));
2260 }
2261 else
2262 t3 = t2;
2263
2264 widest_int oprnd0_min, oprnd0_max;
2265 int msb = 1;
2266 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2267 {
2268 if (!wi::neg_p (oprnd0_min))
2269 msb = 0;
2270 else if (wi::neg_p (oprnd0_max))
2271 msb = -1;
2272 }
2273
2274 if (msb == 0 && d >= 0)
2275 {
2276 /* q = t3; */
2277 q = t3;
2278 pattern_stmt = def_stmt;
2279 }
2280 else
2281 {
2282 /* t4 = oprnd0 >> (prec - 1);
2283 or if we know from VRP that oprnd0 >= 0
2284 t4 = 0;
2285 or if we know from VRP that oprnd0 < 0
2286 t4 = -1; */
2287 append_pattern_def_seq (stmt_vinfo, def_stmt);
2288 t4 = vect_recog_temp_ssa_var (itype, NULL);
2289 if (msb != 1)
2290 def_stmt
2291 = gimple_build_assign_with_ops (INTEGER_CST,
2292 t4, build_int_cst (itype, msb),
2293 NULL_TREE);
2294 else
2295 def_stmt
2296 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2297 build_int_cst (itype, prec - 1));
2298 append_pattern_def_seq (stmt_vinfo, def_stmt);
2299
2300 /* q = t3 - t4; or q = t4 - t3; */
2301 q = vect_recog_temp_ssa_var (itype, NULL);
2302 pattern_stmt
2303 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2304 d < 0 ? t3 : t4);
2305 }
2306 }
2307
2308 if (rhs_code == TRUNC_MOD_EXPR)
2309 {
2310 tree r, t1;
2311
2312 /* We divided. Now finish by:
2313 t1 = q * oprnd1;
2314 r = oprnd0 - t1; */
2315 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2316
2317 t1 = vect_recog_temp_ssa_var (itype, NULL);
2318 def_stmt
2319 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2320 append_pattern_def_seq (stmt_vinfo, def_stmt);
2321
2322 r = vect_recog_temp_ssa_var (itype, NULL);
2323 pattern_stmt
2324 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2325 }
2326
2327 /* Pattern detected. */
2328 if (dump_enabled_p ())
2329 {
2330 dump_printf_loc (MSG_NOTE, vect_location,
2331 "vect_recog_divmod_pattern: detected: ");
2332 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2333 dump_printf (MSG_NOTE, "\n");
2334 }
2335
2336 stmts->safe_push (last_stmt);
2337
2338 *type_in = vectype;
2339 *type_out = vectype;
2340 return pattern_stmt;
2341 }
2342
2343 /* Function vect_recog_mixed_size_cond_pattern
2344
2345 Try to find the following pattern:
2346
2347 type x_t, y_t;
2348 TYPE a_T, b_T, c_T;
2349 loop:
2350 S1 a_T = x_t CMP y_t ? b_T : c_T;
2351
2352 where type 'TYPE' is an integral type which has different size
2353 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2354 than 'type', the constants need to fit into an integer type
2355 with the same width as 'type') or results of conversion from 'type'.
2356
2357 Input:
2358
2359 * LAST_STMT: A stmt from which the pattern search begins.
2360
2361 Output:
2362
2363 * TYPE_IN: The type of the input arguments to the pattern.
2364
2365 * TYPE_OUT: The type of the output of this pattern.
2366
2367 * Return value: A new stmt that will be used to replace the pattern.
2368 Additionally a def_stmt is added.
2369
2370 a_it = x_t CMP y_t ? b_it : c_it;
2371 a_T = (TYPE) a_it; */
2372
2373 static gimple
2374 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2375 tree *type_out)
2376 {
2377 gimple last_stmt = (*stmts)[0];
2378 tree cond_expr, then_clause, else_clause;
2379 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2380 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2381 enum machine_mode cmpmode;
2382 gimple pattern_stmt, def_stmt;
2383 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2384 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2385 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2386 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2387 bool promotion;
2388 tree comp_scalar_type;
2389
2390 if (!is_gimple_assign (last_stmt)
2391 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2392 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2393 return NULL;
2394
2395 cond_expr = gimple_assign_rhs1 (last_stmt);
2396 then_clause = gimple_assign_rhs2 (last_stmt);
2397 else_clause = gimple_assign_rhs3 (last_stmt);
2398
2399 if (!COMPARISON_CLASS_P (cond_expr))
2400 return NULL;
2401
2402 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2403 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2404 if (comp_vectype == NULL_TREE)
2405 return NULL;
2406
2407 type = gimple_expr_type (last_stmt);
2408 if (types_compatible_p (type, comp_scalar_type)
2409 || ((TREE_CODE (then_clause) != INTEGER_CST
2410 || TREE_CODE (else_clause) != INTEGER_CST)
2411 && !INTEGRAL_TYPE_P (comp_scalar_type))
2412 || !INTEGRAL_TYPE_P (type))
2413 return NULL;
2414
2415 if ((TREE_CODE (then_clause) != INTEGER_CST
2416 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2417 &def_stmt0, &promotion))
2418 || (TREE_CODE (else_clause) != INTEGER_CST
2419 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2420 &def_stmt1, &promotion)))
2421 return NULL;
2422
2423 if (orig_type0 && orig_type1
2424 && !types_compatible_p (orig_type0, orig_type1))
2425 return NULL;
2426
2427 if (orig_type0)
2428 {
2429 if (!types_compatible_p (orig_type0, comp_scalar_type))
2430 return NULL;
2431 then_clause = gimple_assign_rhs1 (def_stmt0);
2432 itype = orig_type0;
2433 }
2434
2435 if (orig_type1)
2436 {
2437 if (!types_compatible_p (orig_type1, comp_scalar_type))
2438 return NULL;
2439 else_clause = gimple_assign_rhs1 (def_stmt1);
2440 itype = orig_type1;
2441 }
2442
2443 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2444
2445 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2446 return NULL;
2447
2448 vectype = get_vectype_for_scalar_type (type);
2449 if (vectype == NULL_TREE)
2450 return NULL;
2451
2452 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2453 return NULL;
2454
2455 if (itype == NULL_TREE)
2456 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2457 TYPE_UNSIGNED (type));
2458
2459 if (itype == NULL_TREE
2460 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2461 return NULL;
2462
2463 vecitype = get_vectype_for_scalar_type (itype);
2464 if (vecitype == NULL_TREE)
2465 return NULL;
2466
2467 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2468 return NULL;
2469
2470 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2471 {
2472 if ((TREE_CODE (then_clause) == INTEGER_CST
2473 && !int_fits_type_p (then_clause, itype))
2474 || (TREE_CODE (else_clause) == INTEGER_CST
2475 && !int_fits_type_p (else_clause, itype)))
2476 return NULL;
2477 }
2478
2479 def_stmt
2480 = gimple_build_assign_with_ops (COND_EXPR,
2481 vect_recog_temp_ssa_var (itype, NULL),
2482 unshare_expr (cond_expr),
2483 fold_convert (itype, then_clause),
2484 fold_convert (itype, else_clause));
2485 pattern_stmt
2486 = gimple_build_assign_with_ops (NOP_EXPR,
2487 vect_recog_temp_ssa_var (type, NULL),
2488 gimple_assign_lhs (def_stmt), NULL_TREE);
2489
2490 new_pattern_def_seq (stmt_vinfo, def_stmt);
2491 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2492 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2493 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2494 *type_in = vecitype;
2495 *type_out = vectype;
2496
2497 if (dump_enabled_p ())
2498 dump_printf_loc (MSG_NOTE, vect_location,
2499 "vect_recog_mixed_size_cond_pattern: detected:\n");
2500
2501 return pattern_stmt;
2502 }
2503
2504
2505 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2506 true if bool VAR can be optimized that way. */
2507
2508 static bool
2509 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2510 {
2511 gimple def_stmt;
2512 enum vect_def_type dt;
2513 tree def, rhs1;
2514 enum tree_code rhs_code;
2515
2516 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2517 &dt))
2518 return false;
2519
2520 if (dt != vect_internal_def)
2521 return false;
2522
2523 if (!is_gimple_assign (def_stmt))
2524 return false;
2525
2526 if (!has_single_use (def))
2527 return false;
2528
2529 rhs1 = gimple_assign_rhs1 (def_stmt);
2530 rhs_code = gimple_assign_rhs_code (def_stmt);
2531 switch (rhs_code)
2532 {
2533 case SSA_NAME:
2534 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2535
2536 CASE_CONVERT:
2537 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2538 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2539 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2540 return false;
2541 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2542
2543 case BIT_NOT_EXPR:
2544 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2545
2546 case BIT_AND_EXPR:
2547 case BIT_IOR_EXPR:
2548 case BIT_XOR_EXPR:
2549 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2550 return false;
2551 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2552 bb_vinfo);
2553
2554 default:
2555 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2556 {
2557 tree vecitype, comp_vectype;
2558
2559 /* If the comparison can throw, then is_gimple_condexpr will be
2560 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2561 if (stmt_could_throw_p (def_stmt))
2562 return false;
2563
2564 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2565 if (comp_vectype == NULL_TREE)
2566 return false;
2567
2568 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2569 {
2570 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2571 tree itype
2572 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2573 vecitype = get_vectype_for_scalar_type (itype);
2574 if (vecitype == NULL_TREE)
2575 return false;
2576 }
2577 else
2578 vecitype = comp_vectype;
2579 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2580 }
2581 return false;
2582 }
2583 }
2584
2585
2586 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2587 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2588 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2589
2590 static tree
2591 adjust_bool_pattern_cast (tree type, tree var)
2592 {
2593 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2594 gimple cast_stmt, pattern_stmt;
2595
2596 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2597 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2598 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2599 cast_stmt
2600 = gimple_build_assign_with_ops (NOP_EXPR,
2601 vect_recog_temp_ssa_var (type, NULL),
2602 gimple_assign_lhs (pattern_stmt),
2603 NULL_TREE);
2604 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2605 return gimple_assign_lhs (cast_stmt);
2606 }
2607
2608
2609 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2610 recursively. VAR is an SSA_NAME that should be transformed from bool
2611 to a wider integer type, OUT_TYPE is the desired final integer type of
2612 the whole pattern, TRUEVAL should be NULL unless optimizing
2613 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2614 in the then_clause, STMTS is where statements with added pattern stmts
2615 should be pushed to. */
2616
2617 static tree
2618 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2619 vec<gimple> *stmts)
2620 {
2621 gimple stmt = SSA_NAME_DEF_STMT (var);
2622 enum tree_code rhs_code, def_rhs_code;
2623 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2624 location_t loc;
2625 gimple pattern_stmt, def_stmt;
2626
2627 rhs1 = gimple_assign_rhs1 (stmt);
2628 rhs2 = gimple_assign_rhs2 (stmt);
2629 rhs_code = gimple_assign_rhs_code (stmt);
2630 loc = gimple_location (stmt);
2631 switch (rhs_code)
2632 {
2633 case SSA_NAME:
2634 CASE_CONVERT:
2635 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2636 itype = TREE_TYPE (irhs1);
2637 pattern_stmt
2638 = gimple_build_assign_with_ops (SSA_NAME,
2639 vect_recog_temp_ssa_var (itype, NULL),
2640 irhs1, NULL_TREE);
2641 break;
2642
2643 case BIT_NOT_EXPR:
2644 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2645 itype = TREE_TYPE (irhs1);
2646 pattern_stmt
2647 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2648 vect_recog_temp_ssa_var (itype, NULL),
2649 irhs1, build_int_cst (itype, 1));
2650 break;
2651
2652 case BIT_AND_EXPR:
2653 /* Try to optimize x = y & (a < b ? 1 : 0); into
2654 x = (a < b ? y : 0);
2655
2656 E.g. for:
2657 bool a_b, b_b, c_b;
2658 TYPE d_T;
2659
2660 S1 a_b = x1 CMP1 y1;
2661 S2 b_b = x2 CMP2 y2;
2662 S3 c_b = a_b & b_b;
2663 S4 d_T = (TYPE) c_b;
2664
2665 we would normally emit:
2666
2667 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2668 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2669 S3' c_T = a_T & b_T;
2670 S4' d_T = c_T;
2671
2672 but we can save one stmt by using the
2673 result of one of the COND_EXPRs in the other COND_EXPR and leave
2674 BIT_AND_EXPR stmt out:
2675
2676 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2677 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2678 S4' f_T = c_T;
2679
2680 At least when VEC_COND_EXPR is implemented using masks
2681 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2682 computes the comparison masks and ands it, in one case with
2683 all ones vector, in the other case with a vector register.
2684 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2685 often more expensive. */
2686 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2687 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2688 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2689 {
2690 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2691 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2692 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2693 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2694 {
2695 gimple tstmt;
2696 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2697 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2698 tstmt = stmts->pop ();
2699 gcc_assert (tstmt == def_stmt);
2700 stmts->quick_push (stmt);
2701 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2702 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2703 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2704 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2705 return irhs2;
2706 }
2707 else
2708 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2709 goto and_ior_xor;
2710 }
2711 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2712 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2713 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2714 {
2715 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2716 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2717 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2718 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2719 {
2720 gimple tstmt;
2721 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2722 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2723 tstmt = stmts->pop ();
2724 gcc_assert (tstmt == def_stmt);
2725 stmts->quick_push (stmt);
2726 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2727 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2728 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2729 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2730 return irhs1;
2731 }
2732 else
2733 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2734 goto and_ior_xor;
2735 }
2736 /* FALLTHRU */
2737 case BIT_IOR_EXPR:
2738 case BIT_XOR_EXPR:
2739 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2740 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2741 and_ior_xor:
2742 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2743 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2744 {
2745 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2746 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2747 int out_prec = TYPE_PRECISION (out_type);
2748 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2749 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2750 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2751 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2752 else
2753 {
2754 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2755 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2756 }
2757 }
2758 itype = TREE_TYPE (irhs1);
2759 pattern_stmt
2760 = gimple_build_assign_with_ops (rhs_code,
2761 vect_recog_temp_ssa_var (itype, NULL),
2762 irhs1, irhs2);
2763 break;
2764
2765 default:
2766 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2767 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2768 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2769 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2770 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2771 {
2772 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2773 itype
2774 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2775 }
2776 else
2777 itype = TREE_TYPE (rhs1);
2778 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2779 if (trueval == NULL_TREE)
2780 trueval = build_int_cst (itype, 1);
2781 else
2782 gcc_checking_assert (useless_type_conversion_p (itype,
2783 TREE_TYPE (trueval)));
2784 pattern_stmt
2785 = gimple_build_assign_with_ops (COND_EXPR,
2786 vect_recog_temp_ssa_var (itype, NULL),
2787 cond_expr, trueval,
2788 build_int_cst (itype, 0));
2789 break;
2790 }
2791
2792 stmts->safe_push (stmt);
2793 gimple_set_location (pattern_stmt, loc);
2794 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2795 return gimple_assign_lhs (pattern_stmt);
2796 }
2797
2798
2799 /* Function vect_recog_bool_pattern
2800
2801 Try to find pattern like following:
2802
2803 bool a_b, b_b, c_b, d_b, e_b;
2804 TYPE f_T;
2805 loop:
2806 S1 a_b = x1 CMP1 y1;
2807 S2 b_b = x2 CMP2 y2;
2808 S3 c_b = a_b & b_b;
2809 S4 d_b = x3 CMP3 y3;
2810 S5 e_b = c_b | d_b;
2811 S6 f_T = (TYPE) e_b;
2812
2813 where type 'TYPE' is an integral type.
2814
2815 Input:
2816
2817 * LAST_STMT: A stmt at the end from which the pattern
2818 search begins, i.e. cast of a bool to
2819 an integer type.
2820
2821 Output:
2822
2823 * TYPE_IN: The type of the input arguments to the pattern.
2824
2825 * TYPE_OUT: The type of the output of this pattern.
2826
2827 * Return value: A new stmt that will be used to replace the pattern.
2828
2829 Assuming size of TYPE is the same as size of all comparisons
2830 (otherwise some casts would be added where needed), the above
2831 sequence we create related pattern stmts:
2832 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2833 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2834 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2835 S5' e_T = c_T | d_T;
2836 S6' f_T = e_T;
2837
2838 Instead of the above S3' we could emit:
2839 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2840 S3' c_T = a_T | b_T;
2841 but the above is more efficient. */
2842
2843 static gimple
2844 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2845 tree *type_out)
2846 {
2847 gimple last_stmt = stmts->pop ();
2848 enum tree_code rhs_code;
2849 tree var, lhs, rhs, vectype;
2850 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2851 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2852 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2853 gimple pattern_stmt;
2854
2855 if (!is_gimple_assign (last_stmt))
2856 return NULL;
2857
2858 var = gimple_assign_rhs1 (last_stmt);
2859 lhs = gimple_assign_lhs (last_stmt);
2860
2861 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2862 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2863 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2864 return NULL;
2865
2866 rhs_code = gimple_assign_rhs_code (last_stmt);
2867 if (CONVERT_EXPR_CODE_P (rhs_code))
2868 {
2869 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2870 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2871 return NULL;
2872 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2873 if (vectype == NULL_TREE)
2874 return NULL;
2875
2876 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2877 return NULL;
2878
2879 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2880 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2881 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2882 pattern_stmt
2883 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2884 else
2885 pattern_stmt
2886 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2887 *type_out = vectype;
2888 *type_in = vectype;
2889 stmts->safe_push (last_stmt);
2890 if (dump_enabled_p ())
2891 dump_printf_loc (MSG_NOTE, vect_location,
2892 "vect_recog_bool_pattern: detected:\n");
2893
2894 return pattern_stmt;
2895 }
2896 else if (rhs_code == SSA_NAME
2897 && STMT_VINFO_DATA_REF (stmt_vinfo))
2898 {
2899 stmt_vec_info pattern_stmt_info;
2900 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2901 gcc_assert (vectype != NULL_TREE);
2902 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2903 return NULL;
2904 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2905 return NULL;
2906
2907 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2908 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2909 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2910 {
2911 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2912 gimple cast_stmt
2913 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2914 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2915 rhs = rhs2;
2916 }
2917 pattern_stmt
2918 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2919 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2920 bb_vinfo);
2921 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2922 STMT_VINFO_DATA_REF (pattern_stmt_info)
2923 = STMT_VINFO_DATA_REF (stmt_vinfo);
2924 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2925 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2926 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2927 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2928 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2929 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2930 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2931 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2932 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2933 *type_out = vectype;
2934 *type_in = vectype;
2935 stmts->safe_push (last_stmt);
2936 if (dump_enabled_p ())
2937 dump_printf_loc (MSG_NOTE, vect_location,
2938 "vect_recog_bool_pattern: detected:\n");
2939 return pattern_stmt;
2940 }
2941 else
2942 return NULL;
2943 }
2944
2945
2946 /* Mark statements that are involved in a pattern. */
2947
2948 static inline void
2949 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2950 tree pattern_vectype)
2951 {
2952 stmt_vec_info pattern_stmt_info, def_stmt_info;
2953 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2954 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2955 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2956 gimple def_stmt;
2957
2958 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2959 if (pattern_stmt_info == NULL)
2960 {
2961 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2962 bb_vinfo);
2963 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2964 }
2965 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2966
2967 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2968 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2969 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2970 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2971 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2972 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2973 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2974 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2975 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2976 {
2977 gimple_stmt_iterator si;
2978 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2979 !gsi_end_p (si); gsi_next (&si))
2980 {
2981 def_stmt = gsi_stmt (si);
2982 def_stmt_info = vinfo_for_stmt (def_stmt);
2983 if (def_stmt_info == NULL)
2984 {
2985 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2986 bb_vinfo);
2987 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2988 }
2989 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2990 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2991 STMT_VINFO_DEF_TYPE (def_stmt_info)
2992 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2993 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2994 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2995 }
2996 }
2997 }
2998
2999 /* Function vect_pattern_recog_1
3000
3001 Input:
3002 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3003 computation pattern.
3004 STMT: A stmt from which the pattern search should start.
3005
3006 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3007 expression that computes the same functionality and can be used to
3008 replace the sequence of stmts that are involved in the pattern.
3009
3010 Output:
3011 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3012 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3013 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3014 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3015 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3016 to the available target pattern.
3017
3018 This function also does some bookkeeping, as explained in the documentation
3019 for vect_recog_pattern. */
3020
3021 static void
3022 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3023 gimple_stmt_iterator si,
3024 vec<gimple> *stmts_to_replace)
3025 {
3026 gimple stmt = gsi_stmt (si), pattern_stmt;
3027 stmt_vec_info stmt_info;
3028 loop_vec_info loop_vinfo;
3029 tree pattern_vectype;
3030 tree type_in, type_out;
3031 enum tree_code code;
3032 int i;
3033 gimple next;
3034
3035 stmts_to_replace->truncate (0);
3036 stmts_to_replace->quick_push (stmt);
3037 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3038 if (!pattern_stmt)
3039 return;
3040
3041 stmt = stmts_to_replace->last ();
3042 stmt_info = vinfo_for_stmt (stmt);
3043 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3044
3045 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3046 {
3047 /* No need to check target support (already checked by the pattern
3048 recognition function). */
3049 pattern_vectype = type_out ? type_out : type_in;
3050 }
3051 else
3052 {
3053 enum machine_mode vec_mode;
3054 enum insn_code icode;
3055 optab optab;
3056
3057 /* Check target support */
3058 type_in = get_vectype_for_scalar_type (type_in);
3059 if (!type_in)
3060 return;
3061 if (type_out)
3062 type_out = get_vectype_for_scalar_type (type_out);
3063 else
3064 type_out = type_in;
3065 if (!type_out)
3066 return;
3067 pattern_vectype = type_out;
3068
3069 if (is_gimple_assign (pattern_stmt))
3070 code = gimple_assign_rhs_code (pattern_stmt);
3071 else
3072 {
3073 gcc_assert (is_gimple_call (pattern_stmt));
3074 code = CALL_EXPR;
3075 }
3076
3077 optab = optab_for_tree_code (code, type_in, optab_default);
3078 vec_mode = TYPE_MODE (type_in);
3079 if (!optab
3080 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3081 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3082 return;
3083 }
3084
3085 /* Found a vectorizable pattern. */
3086 if (dump_enabled_p ())
3087 {
3088 dump_printf_loc (MSG_NOTE, vect_location,
3089 "pattern recognized: ");
3090 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3091 dump_printf (MSG_NOTE, "\n");
3092 }
3093
3094 /* Mark the stmts that are involved in the pattern. */
3095 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3096
3097 /* Patterns cannot be vectorized using SLP, because they change the order of
3098 computation. */
3099 if (loop_vinfo)
3100 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3101 if (next == stmt)
3102 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3103
3104 /* It is possible that additional pattern stmts are created and inserted in
3105 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3106 relevant statements. */
3107 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3108 && (unsigned) i < (stmts_to_replace->length () - 1);
3109 i++)
3110 {
3111 stmt_info = vinfo_for_stmt (stmt);
3112 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3113 if (dump_enabled_p ())
3114 {
3115 dump_printf_loc (MSG_NOTE, vect_location,
3116 "additional pattern stmt: ");
3117 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3118 dump_printf (MSG_NOTE, "\n");
3119 }
3120
3121 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3122 }
3123 }
3124
3125
3126 /* Function vect_pattern_recog
3127
3128 Input:
3129 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3130 computation idioms.
3131
3132 Output - for each computation idiom that is detected we create a new stmt
3133 that provides the same functionality and that can be vectorized. We
3134 also record some information in the struct_stmt_info of the relevant
3135 stmts, as explained below:
3136
3137 At the entry to this function we have the following stmts, with the
3138 following initial value in the STMT_VINFO fields:
3139
3140 stmt in_pattern_p related_stmt vec_stmt
3141 S1: a_i = .... - - -
3142 S2: a_2 = ..use(a_i).. - - -
3143 S3: a_1 = ..use(a_2).. - - -
3144 S4: a_0 = ..use(a_1).. - - -
3145 S5: ... = ..use(a_0).. - - -
3146
3147 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3148 represented by a single stmt. We then:
3149 - create a new stmt S6 equivalent to the pattern (the stmt is not
3150 inserted into the code)
3151 - fill in the STMT_VINFO fields as follows:
3152
3153 in_pattern_p related_stmt vec_stmt
3154 S1: a_i = .... - - -
3155 S2: a_2 = ..use(a_i).. - - -
3156 S3: a_1 = ..use(a_2).. - - -
3157 S4: a_0 = ..use(a_1).. true S6 -
3158 '---> S6: a_new = .... - S4 -
3159 S5: ... = ..use(a_0).. - - -
3160
3161 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3162 to each other through the RELATED_STMT field).
3163
3164 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3165 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3166 remain irrelevant unless used by stmts other than S4.
3167
3168 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3169 (because they are marked as irrelevant). It will vectorize S6, and record
3170 a pointer to the new vector stmt VS6 from S6 (as usual).
3171 S4 will be skipped, and S5 will be vectorized as usual:
3172
3173 in_pattern_p related_stmt vec_stmt
3174 S1: a_i = .... - - -
3175 S2: a_2 = ..use(a_i).. - - -
3176 S3: a_1 = ..use(a_2).. - - -
3177 > VS6: va_new = .... - - -
3178 S4: a_0 = ..use(a_1).. true S6 VS6
3179 '---> S6: a_new = .... - S4 VS6
3180 > VS5: ... = ..vuse(va_new).. - - -
3181 S5: ... = ..use(a_0).. - - -
3182
3183 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3184 elsewhere), and we'll end up with:
3185
3186 VS6: va_new = ....
3187 VS5: ... = ..vuse(va_new)..
3188
3189 In case of more than one pattern statements, e.g., widen-mult with
3190 intermediate type:
3191
3192 S1 a_t = ;
3193 S2 a_T = (TYPE) a_t;
3194 '--> S3: a_it = (interm_type) a_t;
3195 S4 prod_T = a_T * CONST;
3196 '--> S5: prod_T' = a_it w* CONST;
3197
3198 there may be other users of a_T outside the pattern. In that case S2 will
3199 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3200 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3201 be recorded in S3. */
3202
3203 void
3204 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3205 {
3206 struct loop *loop;
3207 basic_block *bbs;
3208 unsigned int nbbs;
3209 gimple_stmt_iterator si;
3210 unsigned int i, j;
3211 vect_recog_func_ptr vect_recog_func;
3212 stack_vec<gimple, 1> stmts_to_replace;
3213 gimple stmt;
3214
3215 if (dump_enabled_p ())
3216 dump_printf_loc (MSG_NOTE, vect_location,
3217 "=== vect_pattern_recog ===\n");
3218
3219 if (loop_vinfo)
3220 {
3221 loop = LOOP_VINFO_LOOP (loop_vinfo);
3222 bbs = LOOP_VINFO_BBS (loop_vinfo);
3223 nbbs = loop->num_nodes;
3224 }
3225 else
3226 {
3227 bbs = &BB_VINFO_BB (bb_vinfo);
3228 nbbs = 1;
3229 }
3230
3231 /* Scan through the loop stmts, applying the pattern recognition
3232 functions starting at each stmt visited: */
3233 for (i = 0; i < nbbs; i++)
3234 {
3235 basic_block bb = bbs[i];
3236 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3237 {
3238 if (bb_vinfo && (stmt = gsi_stmt (si))
3239 && vinfo_for_stmt (stmt)
3240 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3241 continue;
3242
3243 /* Scan over all generic vect_recog_xxx_pattern functions. */
3244 for (j = 0; j < NUM_PATTERNS; j++)
3245 {
3246 vect_recog_func = vect_vect_recog_func_ptrs[j];
3247 vect_pattern_recog_1 (vect_recog_func, si,
3248 &stmts_to_replace);
3249 }
3250 }
3251 }
3252 }