]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-patterns.c
* tree-vectorizer.h (NUM_PATTERNS): Increment.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-flow.h"
31 #include "cfgloop.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "params.h"
35 #include "tree-data-ref.h"
36 #include "tree-vectorizer.h"
37 #include "recog.h" /* FIXME: for insn_data */
38 #include "diagnostic-core.h"
39 #include "dumpfile.h"
40
41 /* Pattern recognition functions */
42 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
43 tree *);
44 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
45 tree *);
46 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
47 tree *);
48 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
49 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
50 tree *);
51 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
52 tree *, tree *);
53 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
55 tree *, tree *);
56 static gimple vect_recog_divmod_pattern (vec<gimple> *,
57 tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
59 tree *, tree *);
60 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_widen_shift_pattern,
67 vect_recog_over_widening_pattern,
68 vect_recog_rotate_pattern,
69 vect_recog_vector_vector_shift_pattern,
70 vect_recog_divmod_pattern,
71 vect_recog_mixed_size_cond_pattern,
72 vect_recog_bool_pattern};
73
74 static inline void
75 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
76 {
77 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
78 stmt);
79 }
80
81 static inline void
82 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
83 {
84 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
85 append_pattern_def_seq (stmt_info, stmt);
86 }
87
88 /* Check whether STMT2 is in the same loop or basic block as STMT1.
89 Which of the two applies depends on whether we're currently doing
90 loop-based or basic-block-based vectorization, as determined by
91 the vinfo_for_stmt for STMT1 (which must be defined).
92
93 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
94 to be defined as well. */
95
96 static bool
97 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
98 {
99 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
100 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
101 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
102
103 if (!gimple_bb (stmt2))
104 return false;
105
106 if (loop_vinfo)
107 {
108 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
109 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
110 return false;
111 }
112 else
113 {
114 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
115 || gimple_code (stmt2) == GIMPLE_PHI)
116 return false;
117 }
118
119 gcc_assert (vinfo_for_stmt (stmt2));
120 return true;
121 }
122
123 /* If the LHS of DEF_STMT has a single use, and that statement is
124 in the same loop or basic block, return it. */
125
126 static gimple
127 vect_single_imm_use (gimple def_stmt)
128 {
129 tree lhs = gimple_assign_lhs (def_stmt);
130 use_operand_p use_p;
131 gimple use_stmt;
132
133 if (!single_imm_use (lhs, &use_p, &use_stmt))
134 return NULL;
135
136 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
137 return NULL;
138
139 return use_stmt;
140 }
141
142 /* Check whether NAME, an ssa-name used in USE_STMT,
143 is a result of a type promotion or demotion, such that:
144 DEF_STMT: NAME = NOP (name0)
145 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
146 If CHECK_SIGN is TRUE, check that either both types are signed or both are
147 unsigned. */
148
149 static bool
150 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
151 tree *orig_type, gimple *def_stmt, bool *promotion)
152 {
153 tree dummy;
154 gimple dummy_gimple;
155 loop_vec_info loop_vinfo;
156 stmt_vec_info stmt_vinfo;
157 tree type = TREE_TYPE (name);
158 tree oprnd0;
159 enum vect_def_type dt;
160 tree def;
161 bb_vec_info bb_vinfo;
162
163 stmt_vinfo = vinfo_for_stmt (use_stmt);
164 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
165 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
166 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
167 &def, &dt))
168 return false;
169
170 if (dt != vect_internal_def
171 && dt != vect_external_def && dt != vect_constant_def)
172 return false;
173
174 if (!*def_stmt)
175 return false;
176
177 if (!is_gimple_assign (*def_stmt))
178 return false;
179
180 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
181 return false;
182
183 oprnd0 = gimple_assign_rhs1 (*def_stmt);
184
185 *orig_type = TREE_TYPE (oprnd0);
186 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
187 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
188 return false;
189
190 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
191 *promotion = true;
192 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
193 *promotion = false;
194 else
195 return false;
196
197 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
198 bb_vinfo, &dummy_gimple, &dummy, &dt))
199 return false;
200
201 return true;
202 }
203
204 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
205 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
206
207 static tree
208 vect_recog_temp_ssa_var (tree type, gimple stmt)
209 {
210 return make_temp_ssa_name (type, stmt, "patt");
211 }
212
213 /* Function vect_recog_dot_prod_pattern
214
215 Try to find the following pattern:
216
217 type x_t, y_t;
218 TYPE1 prod;
219 TYPE2 sum = init;
220 loop:
221 sum_0 = phi <init, sum_1>
222 S1 x_t = ...
223 S2 y_t = ...
224 S3 x_T = (TYPE1) x_t;
225 S4 y_T = (TYPE1) y_t;
226 S5 prod = x_T * y_T;
227 [S6 prod = (TYPE2) prod; #optional]
228 S7 sum_1 = prod + sum_0;
229
230 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
231 same size of 'TYPE1' or bigger. This is a special case of a reduction
232 computation.
233
234 Input:
235
236 * STMTS: Contains a stmt from which the pattern search begins. In the
237 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
238 will be detected.
239
240 Output:
241
242 * TYPE_IN: The type of the input arguments to the pattern.
243
244 * TYPE_OUT: The type of the output of this pattern.
245
246 * Return value: A new stmt that will be used to replace the sequence of
247 stmts that constitute the pattern. In this case it will be:
248 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
249
250 Note: The dot-prod idiom is a widening reduction pattern that is
251 vectorized without preserving all the intermediate results. It
252 produces only N/2 (widened) results (by summing up pairs of
253 intermediate results) rather than all N results. Therefore, we
254 cannot allow this pattern when we want to get all the results and in
255 the correct order (as is the case when this computation is in an
256 inner-loop nested in an outer-loop that us being vectorized). */
257
258 static gimple
259 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
260 tree *type_out)
261 {
262 gimple stmt, last_stmt = (*stmts)[0];
263 tree oprnd0, oprnd1;
264 tree oprnd00, oprnd01;
265 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
266 tree type, half_type;
267 gimple pattern_stmt;
268 tree prod_type;
269 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
270 struct loop *loop;
271 tree var;
272 bool promotion;
273
274 if (!loop_info)
275 return NULL;
276
277 loop = LOOP_VINFO_LOOP (loop_info);
278
279 if (!is_gimple_assign (last_stmt))
280 return NULL;
281
282 type = gimple_expr_type (last_stmt);
283
284 /* Look for the following pattern
285 DX = (TYPE1) X;
286 DY = (TYPE1) Y;
287 DPROD = DX * DY;
288 DDPROD = (TYPE2) DPROD;
289 sum_1 = DDPROD + sum_0;
290 In which
291 - DX is double the size of X
292 - DY is double the size of Y
293 - DX, DY, DPROD all have the same type
294 - sum is the same size of DPROD or bigger
295 - sum has been recognized as a reduction variable.
296
297 This is equivalent to:
298 DPROD = X w* Y; #widen mult
299 sum_1 = DPROD w+ sum_0; #widen summation
300 or
301 DPROD = X w* Y; #widen mult
302 sum_1 = DPROD + sum_0; #summation
303 */
304
305 /* Starting from LAST_STMT, follow the defs of its uses in search
306 of the above pattern. */
307
308 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
309 return NULL;
310
311 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
312 {
313 /* Has been detected as widening-summation? */
314
315 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
316 type = gimple_expr_type (stmt);
317 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
318 return NULL;
319 oprnd0 = gimple_assign_rhs1 (stmt);
320 oprnd1 = gimple_assign_rhs2 (stmt);
321 half_type = TREE_TYPE (oprnd0);
322 }
323 else
324 {
325 gimple def_stmt;
326
327 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
328 return NULL;
329 oprnd0 = gimple_assign_rhs1 (last_stmt);
330 oprnd1 = gimple_assign_rhs2 (last_stmt);
331 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
332 || !types_compatible_p (TREE_TYPE (oprnd1), type))
333 return NULL;
334 stmt = last_stmt;
335
336 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
337 &promotion)
338 && promotion)
339 {
340 stmt = def_stmt;
341 oprnd0 = gimple_assign_rhs1 (stmt);
342 }
343 else
344 half_type = type;
345 }
346
347 /* So far so good. Since last_stmt was detected as a (summation) reduction,
348 we know that oprnd1 is the reduction variable (defined by a loop-header
349 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
350 Left to check that oprnd0 is defined by a (widen_)mult_expr */
351 if (TREE_CODE (oprnd0) != SSA_NAME)
352 return NULL;
353
354 prod_type = half_type;
355 stmt = SSA_NAME_DEF_STMT (oprnd0);
356
357 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
358 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
359 return NULL;
360
361 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
362 inside the loop (in case we are analyzing an outer-loop). */
363 if (!is_gimple_assign (stmt))
364 return NULL;
365 stmt_vinfo = vinfo_for_stmt (stmt);
366 gcc_assert (stmt_vinfo);
367 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
368 return NULL;
369 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
370 return NULL;
371 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
372 {
373 /* Has been detected as a widening multiplication? */
374
375 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
376 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
377 return NULL;
378 stmt_vinfo = vinfo_for_stmt (stmt);
379 gcc_assert (stmt_vinfo);
380 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
381 oprnd00 = gimple_assign_rhs1 (stmt);
382 oprnd01 = gimple_assign_rhs2 (stmt);
383 }
384 else
385 {
386 tree half_type0, half_type1;
387 gimple def_stmt;
388 tree oprnd0, oprnd1;
389
390 oprnd0 = gimple_assign_rhs1 (stmt);
391 oprnd1 = gimple_assign_rhs2 (stmt);
392 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
393 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
394 return NULL;
395 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
396 &promotion)
397 || !promotion)
398 return NULL;
399 oprnd00 = gimple_assign_rhs1 (def_stmt);
400 if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
401 &promotion)
402 || !promotion)
403 return NULL;
404 oprnd01 = gimple_assign_rhs1 (def_stmt);
405 if (!types_compatible_p (half_type0, half_type1))
406 return NULL;
407 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
408 return NULL;
409 }
410
411 half_type = TREE_TYPE (oprnd00);
412 *type_in = half_type;
413 *type_out = type;
414
415 /* Pattern detected. Create a stmt to be used to replace the pattern: */
416 var = vect_recog_temp_ssa_var (type, NULL);
417 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
418 oprnd00, oprnd01, oprnd1);
419
420 if (dump_enabled_p ())
421 {
422 dump_printf_loc (MSG_NOTE, vect_location,
423 "vect_recog_dot_prod_pattern: detected: ");
424 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
425 }
426
427 /* We don't allow changing the order of the computation in the inner-loop
428 when doing outer-loop vectorization. */
429 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
430
431 return pattern_stmt;
432 }
433
434
435 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
436 and LSHIFT_EXPR.
437
438 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
439 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
440
441 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
442 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
443 that satisfies the above restrictions, we can perform a widening opeartion
444 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
445 with a_it = (interm_type) a_t; */
446
447 static bool
448 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
449 tree const_oprnd, tree *oprnd,
450 vec<gimple> *stmts, tree type,
451 tree *half_type, gimple def_stmt)
452 {
453 tree new_type, new_oprnd;
454 gimple new_stmt;
455
456 if (code != MULT_EXPR && code != LSHIFT_EXPR)
457 return false;
458
459 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
460 || (code == LSHIFT_EXPR
461 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
462 != 1))
463 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
464 {
465 /* CONST_OPRND is a constant of HALF_TYPE. */
466 *oprnd = gimple_assign_rhs1 (def_stmt);
467 return true;
468 }
469
470 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
471 return false;
472
473 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
474 return false;
475
476 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
477 a type 2 times bigger than HALF_TYPE. */
478 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
479 TYPE_UNSIGNED (type));
480 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
481 || (code == LSHIFT_EXPR
482 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
483 return false;
484
485 /* Use NEW_TYPE for widening operation. */
486 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
487 {
488 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
489 /* Check if the already created pattern stmt is what we need. */
490 if (!is_gimple_assign (new_stmt)
491 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
492 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
493 return false;
494
495 stmts->safe_push (def_stmt);
496 *oprnd = gimple_assign_lhs (new_stmt);
497 }
498 else
499 {
500 /* Create a_T = (NEW_TYPE) a_t; */
501 *oprnd = gimple_assign_rhs1 (def_stmt);
502 new_oprnd = make_ssa_name (new_type, NULL);
503 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
504 NULL_TREE);
505 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
506 stmts->safe_push (def_stmt);
507 *oprnd = new_oprnd;
508 }
509
510 *half_type = new_type;
511 return true;
512 }
513
514
515 /* Function vect_recog_widen_mult_pattern
516
517 Try to find the following pattern:
518
519 type a_t, b_t;
520 TYPE a_T, b_T, prod_T;
521
522 S1 a_t = ;
523 S2 b_t = ;
524 S3 a_T = (TYPE) a_t;
525 S4 b_T = (TYPE) b_t;
526 S5 prod_T = a_T * b_T;
527
528 where type 'TYPE' is at least double the size of type 'type'.
529
530 Also detect unsigned cases:
531
532 unsigned type a_t, b_t;
533 unsigned TYPE u_prod_T;
534 TYPE a_T, b_T, prod_T;
535
536 S1 a_t = ;
537 S2 b_t = ;
538 S3 a_T = (TYPE) a_t;
539 S4 b_T = (TYPE) b_t;
540 S5 prod_T = a_T * b_T;
541 S6 u_prod_T = (unsigned TYPE) prod_T;
542
543 and multiplication by constants:
544
545 type a_t;
546 TYPE a_T, prod_T;
547
548 S1 a_t = ;
549 S3 a_T = (TYPE) a_t;
550 S5 prod_T = a_T * CONST;
551
552 A special case of multiplication by constants is when 'TYPE' is 4 times
553 bigger than 'type', but CONST fits an intermediate type 2 times smaller
554 than 'TYPE'. In that case we create an additional pattern stmt for S3
555 to create a variable of the intermediate type, and perform widen-mult
556 on the intermediate type as well:
557
558 type a_t;
559 interm_type a_it;
560 TYPE a_T, prod_T, prod_T';
561
562 S1 a_t = ;
563 S3 a_T = (TYPE) a_t;
564 '--> a_it = (interm_type) a_t;
565 S5 prod_T = a_T * CONST;
566 '--> prod_T' = a_it w* CONST;
567
568 Input/Output:
569
570 * STMTS: Contains a stmt from which the pattern search begins. In the
571 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
572 is detected. In case of unsigned widen-mult, the original stmt (S5) is
573 replaced with S6 in STMTS. In case of multiplication by a constant
574 of an intermediate type (the last case above), STMTS also contains S3
575 (inserted before S5).
576
577 Output:
578
579 * TYPE_IN: The type of the input arguments to the pattern.
580
581 * TYPE_OUT: The type of the output of this pattern.
582
583 * Return value: A new stmt that will be used to replace the sequence of
584 stmts that constitute the pattern. In this case it will be:
585 WIDEN_MULT <a_t, b_t>
586 */
587
588 static gimple
589 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
590 tree *type_in, tree *type_out)
591 {
592 gimple last_stmt = stmts->pop ();
593 gimple def_stmt0, def_stmt1;
594 tree oprnd0, oprnd1;
595 tree type, half_type0, half_type1;
596 gimple pattern_stmt;
597 tree vectype, vectype_out = NULL_TREE;
598 tree var;
599 enum tree_code dummy_code;
600 int dummy_int;
601 vec<tree> dummy_vec;
602 bool op1_ok;
603 bool promotion;
604
605 if (!is_gimple_assign (last_stmt))
606 return NULL;
607
608 type = gimple_expr_type (last_stmt);
609
610 /* Starting from LAST_STMT, follow the defs of its uses in search
611 of the above pattern. */
612
613 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
614 return NULL;
615
616 oprnd0 = gimple_assign_rhs1 (last_stmt);
617 oprnd1 = gimple_assign_rhs2 (last_stmt);
618 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
619 || !types_compatible_p (TREE_TYPE (oprnd1), type))
620 return NULL;
621
622 /* Check argument 0. */
623 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
624 &promotion)
625 || !promotion)
626 return NULL;
627 /* Check argument 1. */
628 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
629 &def_stmt1, &promotion);
630
631 if (op1_ok && promotion)
632 {
633 oprnd0 = gimple_assign_rhs1 (def_stmt0);
634 oprnd1 = gimple_assign_rhs1 (def_stmt1);
635 }
636 else
637 {
638 if (TREE_CODE (oprnd1) == INTEGER_CST
639 && TREE_CODE (half_type0) == INTEGER_TYPE
640 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
641 &oprnd0, stmts, type,
642 &half_type0, def_stmt0))
643 half_type1 = half_type0;
644 else
645 return NULL;
646 }
647
648 /* Handle unsigned case. Look for
649 S6 u_prod_T = (unsigned TYPE) prod_T;
650 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
651 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
652 {
653 gimple use_stmt;
654 tree use_lhs;
655 tree use_type;
656
657 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
658 return NULL;
659
660 use_stmt = vect_single_imm_use (last_stmt);
661 if (!use_stmt || !is_gimple_assign (use_stmt)
662 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
663 return NULL;
664
665 use_lhs = gimple_assign_lhs (use_stmt);
666 use_type = TREE_TYPE (use_lhs);
667 if (!INTEGRAL_TYPE_P (use_type)
668 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
669 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
670 return NULL;
671
672 type = use_type;
673 last_stmt = use_stmt;
674 }
675
676 if (!types_compatible_p (half_type0, half_type1))
677 return NULL;
678
679 /* Pattern detected. */
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location,
682 "vect_recog_widen_mult_pattern: detected: ");
683
684 /* Check target support */
685 vectype = get_vectype_for_scalar_type (half_type0);
686 vectype_out = get_vectype_for_scalar_type (type);
687 if (!vectype
688 || !vectype_out
689 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
690 vectype_out, vectype,
691 &dummy_code, &dummy_code,
692 &dummy_int, &dummy_vec))
693 return NULL;
694
695 *type_in = vectype;
696 *type_out = vectype_out;
697
698 /* Pattern supported. Create a stmt to be used to replace the pattern: */
699 var = vect_recog_temp_ssa_var (type, NULL);
700 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
701 oprnd1);
702
703 if (dump_enabled_p ())
704 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
705
706 stmts->safe_push (last_stmt);
707 return pattern_stmt;
708 }
709
710
711 /* Function vect_recog_pow_pattern
712
713 Try to find the following pattern:
714
715 x = POW (y, N);
716
717 with POW being one of pow, powf, powi, powif and N being
718 either 2 or 0.5.
719
720 Input:
721
722 * LAST_STMT: A stmt from which the pattern search begins.
723
724 Output:
725
726 * TYPE_IN: The type of the input arguments to the pattern.
727
728 * TYPE_OUT: The type of the output of this pattern.
729
730 * Return value: A new stmt that will be used to replace the sequence of
731 stmts that constitute the pattern. In this case it will be:
732 x = x * x
733 or
734 x = sqrt (x)
735 */
736
737 static gimple
738 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
739 tree *type_out)
740 {
741 gimple last_stmt = (*stmts)[0];
742 tree fn, base, exp = NULL;
743 gimple stmt;
744 tree var;
745
746 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
747 return NULL;
748
749 fn = gimple_call_fndecl (last_stmt);
750 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
751 return NULL;
752
753 switch (DECL_FUNCTION_CODE (fn))
754 {
755 case BUILT_IN_POWIF:
756 case BUILT_IN_POWI:
757 case BUILT_IN_POWF:
758 case BUILT_IN_POW:
759 base = gimple_call_arg (last_stmt, 0);
760 exp = gimple_call_arg (last_stmt, 1);
761 if (TREE_CODE (exp) != REAL_CST
762 && TREE_CODE (exp) != INTEGER_CST)
763 return NULL;
764 break;
765
766 default:
767 return NULL;
768 }
769
770 /* We now have a pow or powi builtin function call with a constant
771 exponent. */
772
773 *type_out = NULL_TREE;
774
775 /* Catch squaring. */
776 if ((host_integerp (exp, 0)
777 && tree_low_cst (exp, 0) == 2)
778 || (TREE_CODE (exp) == REAL_CST
779 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
780 {
781 *type_in = TREE_TYPE (base);
782
783 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
784 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
785 return stmt;
786 }
787
788 /* Catch square root. */
789 if (TREE_CODE (exp) == REAL_CST
790 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
791 {
792 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
793 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
794 if (*type_in)
795 {
796 gimple stmt = gimple_build_call (newfn, 1, base);
797 if (vectorizable_function (stmt, *type_in, *type_in)
798 != NULL_TREE)
799 {
800 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
801 gimple_call_set_lhs (stmt, var);
802 return stmt;
803 }
804 }
805 }
806
807 return NULL;
808 }
809
810
811 /* Function vect_recog_widen_sum_pattern
812
813 Try to find the following pattern:
814
815 type x_t;
816 TYPE x_T, sum = init;
817 loop:
818 sum_0 = phi <init, sum_1>
819 S1 x_t = *p;
820 S2 x_T = (TYPE) x_t;
821 S3 sum_1 = x_T + sum_0;
822
823 where type 'TYPE' is at least double the size of type 'type', i.e - we're
824 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
825 a special case of a reduction computation.
826
827 Input:
828
829 * LAST_STMT: A stmt from which the pattern search begins. In the example,
830 when this function is called with S3, the pattern {S2,S3} will be detected.
831
832 Output:
833
834 * TYPE_IN: The type of the input arguments to the pattern.
835
836 * TYPE_OUT: The type of the output of this pattern.
837
838 * Return value: A new stmt that will be used to replace the sequence of
839 stmts that constitute the pattern. In this case it will be:
840 WIDEN_SUM <x_t, sum_0>
841
842 Note: The widening-sum idiom is a widening reduction pattern that is
843 vectorized without preserving all the intermediate results. It
844 produces only N/2 (widened) results (by summing up pairs of
845 intermediate results) rather than all N results. Therefore, we
846 cannot allow this pattern when we want to get all the results and in
847 the correct order (as is the case when this computation is in an
848 inner-loop nested in an outer-loop that us being vectorized). */
849
850 static gimple
851 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
852 tree *type_out)
853 {
854 gimple stmt, last_stmt = (*stmts)[0];
855 tree oprnd0, oprnd1;
856 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
857 tree type, half_type;
858 gimple pattern_stmt;
859 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
860 struct loop *loop;
861 tree var;
862 bool promotion;
863
864 if (!loop_info)
865 return NULL;
866
867 loop = LOOP_VINFO_LOOP (loop_info);
868
869 if (!is_gimple_assign (last_stmt))
870 return NULL;
871
872 type = gimple_expr_type (last_stmt);
873
874 /* Look for the following pattern
875 DX = (TYPE) X;
876 sum_1 = DX + sum_0;
877 In which DX is at least double the size of X, and sum_1 has been
878 recognized as a reduction variable.
879 */
880
881 /* Starting from LAST_STMT, follow the defs of its uses in search
882 of the above pattern. */
883
884 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
885 return NULL;
886
887 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
888 return NULL;
889
890 oprnd0 = gimple_assign_rhs1 (last_stmt);
891 oprnd1 = gimple_assign_rhs2 (last_stmt);
892 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
893 || !types_compatible_p (TREE_TYPE (oprnd1), type))
894 return NULL;
895
896 /* So far so good. Since last_stmt was detected as a (summation) reduction,
897 we know that oprnd1 is the reduction variable (defined by a loop-header
898 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
899 Left to check that oprnd0 is defined by a cast from type 'type' to type
900 'TYPE'. */
901
902 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
903 &promotion)
904 || !promotion)
905 return NULL;
906
907 oprnd0 = gimple_assign_rhs1 (stmt);
908 *type_in = half_type;
909 *type_out = type;
910
911 /* Pattern detected. Create a stmt to be used to replace the pattern: */
912 var = vect_recog_temp_ssa_var (type, NULL);
913 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
914 oprnd0, oprnd1);
915
916 if (dump_enabled_p ())
917 {
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_recog_widen_sum_pattern: detected: ");
920 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
921 }
922
923 /* We don't allow changing the order of the computation in the inner-loop
924 when doing outer-loop vectorization. */
925 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
926
927 return pattern_stmt;
928 }
929
930
931 /* Return TRUE if the operation in STMT can be performed on a smaller type.
932
933 Input:
934 STMT - a statement to check.
935 DEF - we support operations with two operands, one of which is constant.
936 The other operand can be defined by a demotion operation, or by a
937 previous statement in a sequence of over-promoted operations. In the
938 later case DEF is used to replace that operand. (It is defined by a
939 pattern statement we created for the previous statement in the
940 sequence).
941
942 Input/output:
943 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
944 NULL, it's the type of DEF.
945 STMTS - additional pattern statements. If a pattern statement (type
946 conversion) is created in this function, its original statement is
947 added to STMTS.
948
949 Output:
950 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
951 operands to use in the new pattern statement for STMT (will be created
952 in vect_recog_over_widening_pattern ()).
953 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
954 statements for STMT: the first one is a type promotion and the second
955 one is the operation itself. We return the type promotion statement
956 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
957 the second pattern statement. */
958
959 static bool
960 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
961 tree *op0, tree *op1, gimple *new_def_stmt,
962 vec<gimple> *stmts)
963 {
964 enum tree_code code;
965 tree const_oprnd, oprnd;
966 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
967 gimple def_stmt, new_stmt;
968 bool first = false;
969 bool promotion;
970
971 *op0 = NULL_TREE;
972 *op1 = NULL_TREE;
973 *new_def_stmt = NULL;
974
975 if (!is_gimple_assign (stmt))
976 return false;
977
978 code = gimple_assign_rhs_code (stmt);
979 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
980 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
981 return false;
982
983 oprnd = gimple_assign_rhs1 (stmt);
984 const_oprnd = gimple_assign_rhs2 (stmt);
985 type = gimple_expr_type (stmt);
986
987 if (TREE_CODE (oprnd) != SSA_NAME
988 || TREE_CODE (const_oprnd) != INTEGER_CST)
989 return false;
990
991 /* If oprnd has other uses besides that in stmt we cannot mark it
992 as being part of a pattern only. */
993 if (!has_single_use (oprnd))
994 return false;
995
996 /* If we are in the middle of a sequence, we use DEF from a previous
997 statement. Otherwise, OPRND has to be a result of type promotion. */
998 if (*new_type)
999 {
1000 half_type = *new_type;
1001 oprnd = def;
1002 }
1003 else
1004 {
1005 first = true;
1006 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1007 &promotion)
1008 || !promotion
1009 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1010 return false;
1011 }
1012
1013 /* Can we perform the operation on a smaller type? */
1014 switch (code)
1015 {
1016 case BIT_IOR_EXPR:
1017 case BIT_XOR_EXPR:
1018 case BIT_AND_EXPR:
1019 if (!int_fits_type_p (const_oprnd, half_type))
1020 {
1021 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1022 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1023 return false;
1024
1025 interm_type = build_nonstandard_integer_type (
1026 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1027 if (!int_fits_type_p (const_oprnd, interm_type))
1028 return false;
1029 }
1030
1031 break;
1032
1033 case LSHIFT_EXPR:
1034 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1035 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1036 return false;
1037
1038 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1039 (e.g., if the original value was char, the shift amount is at most 8
1040 if we want to use short). */
1041 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1042 return false;
1043
1044 interm_type = build_nonstandard_integer_type (
1045 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1046
1047 if (!vect_supportable_shift (code, interm_type))
1048 return false;
1049
1050 break;
1051
1052 case RSHIFT_EXPR:
1053 if (vect_supportable_shift (code, half_type))
1054 break;
1055
1056 /* Try intermediate type - HALF_TYPE is not supported. */
1057 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1058 return false;
1059
1060 interm_type = build_nonstandard_integer_type (
1061 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1062
1063 if (!vect_supportable_shift (code, interm_type))
1064 return false;
1065
1066 break;
1067
1068 default:
1069 gcc_unreachable ();
1070 }
1071
1072 /* There are four possible cases:
1073 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1074 the first statement in the sequence)
1075 a. The original, HALF_TYPE, is not enough - we replace the promotion
1076 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1077 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1078 promotion.
1079 2. OPRND is defined by a pattern statement we created.
1080 a. Its type is not sufficient for the operation, we create a new stmt:
1081 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1082 this statement in NEW_DEF_STMT, and it is later put in
1083 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1084 b. OPRND is good to use in the new statement. */
1085 if (first)
1086 {
1087 if (interm_type)
1088 {
1089 /* Replace the original type conversion HALF_TYPE->TYPE with
1090 HALF_TYPE->INTERM_TYPE. */
1091 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1092 {
1093 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1094 /* Check if the already created pattern stmt is what we need. */
1095 if (!is_gimple_assign (new_stmt)
1096 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1097 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1098 return false;
1099
1100 stmts->safe_push (def_stmt);
1101 oprnd = gimple_assign_lhs (new_stmt);
1102 }
1103 else
1104 {
1105 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1106 oprnd = gimple_assign_rhs1 (def_stmt);
1107 new_oprnd = make_ssa_name (interm_type, NULL);
1108 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1109 oprnd, NULL_TREE);
1110 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1111 stmts->safe_push (def_stmt);
1112 oprnd = new_oprnd;
1113 }
1114 }
1115 else
1116 {
1117 /* Retrieve the operand before the type promotion. */
1118 oprnd = gimple_assign_rhs1 (def_stmt);
1119 }
1120 }
1121 else
1122 {
1123 if (interm_type)
1124 {
1125 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1126 new_oprnd = make_ssa_name (interm_type, NULL);
1127 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1128 oprnd, NULL_TREE);
1129 oprnd = new_oprnd;
1130 *new_def_stmt = new_stmt;
1131 }
1132
1133 /* Otherwise, OPRND is already set. */
1134 }
1135
1136 if (interm_type)
1137 *new_type = interm_type;
1138 else
1139 *new_type = half_type;
1140
1141 *op0 = oprnd;
1142 *op1 = fold_convert (*new_type, const_oprnd);
1143
1144 return true;
1145 }
1146
1147
1148 /* Try to find a statement or a sequence of statements that can be performed
1149 on a smaller type:
1150
1151 type x_t;
1152 TYPE x_T, res0_T, res1_T;
1153 loop:
1154 S1 x_t = *p;
1155 S2 x_T = (TYPE) x_t;
1156 S3 res0_T = op (x_T, C0);
1157 S4 res1_T = op (res0_T, C1);
1158 S5 ... = () res1_T; - type demotion
1159
1160 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1161 constants.
1162 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1163 be 'type' or some intermediate type. For now, we expect S5 to be a type
1164 demotion operation. We also check that S3 and S4 have only one use. */
1165
1166 static gimple
1167 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1168 tree *type_in, tree *type_out)
1169 {
1170 gimple stmt = stmts->pop ();
1171 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1172 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1173 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1174 bool first;
1175 tree type = NULL;
1176
1177 first = true;
1178 while (1)
1179 {
1180 if (!vinfo_for_stmt (stmt)
1181 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1182 return NULL;
1183
1184 new_def_stmt = NULL;
1185 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1186 &op0, &op1, &new_def_stmt,
1187 stmts))
1188 {
1189 if (first)
1190 return NULL;
1191 else
1192 break;
1193 }
1194
1195 /* STMT can be performed on a smaller type. Check its uses. */
1196 use_stmt = vect_single_imm_use (stmt);
1197 if (!use_stmt || !is_gimple_assign (use_stmt))
1198 return NULL;
1199
1200 /* Create pattern statement for STMT. */
1201 vectype = get_vectype_for_scalar_type (new_type);
1202 if (!vectype)
1203 return NULL;
1204
1205 /* We want to collect all the statements for which we create pattern
1206 statetments, except for the case when the last statement in the
1207 sequence doesn't have a corresponding pattern statement. In such
1208 case we associate the last pattern statement with the last statement
1209 in the sequence. Therefore, we only add the original statement to
1210 the list if we know that it is not the last. */
1211 if (prev_stmt)
1212 stmts->safe_push (prev_stmt);
1213
1214 var = vect_recog_temp_ssa_var (new_type, NULL);
1215 pattern_stmt
1216 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1217 op0, op1);
1218 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1219 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1220
1221 if (dump_enabled_p ())
1222 {
1223 dump_printf_loc (MSG_NOTE, vect_location,
1224 "created pattern stmt: ");
1225 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1226 }
1227
1228 type = gimple_expr_type (stmt);
1229 prev_stmt = stmt;
1230 stmt = use_stmt;
1231
1232 first = false;
1233 }
1234
1235 /* We got a sequence. We expect it to end with a type demotion operation.
1236 Otherwise, we quit (for now). There are three possible cases: the
1237 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1238 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1239 NEW_TYPE differs (we create a new conversion statement). */
1240 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1241 {
1242 use_lhs = gimple_assign_lhs (use_stmt);
1243 use_type = TREE_TYPE (use_lhs);
1244 /* Support only type demotion or signedess change. */
1245 if (!INTEGRAL_TYPE_P (use_type)
1246 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1247 return NULL;
1248
1249 /* Check that NEW_TYPE is not bigger than the conversion result. */
1250 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1251 return NULL;
1252
1253 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1254 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1255 {
1256 /* Create NEW_TYPE->USE_TYPE conversion. */
1257 new_oprnd = make_ssa_name (use_type, NULL);
1258 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1259 var, NULL_TREE);
1260 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1261
1262 *type_in = get_vectype_for_scalar_type (new_type);
1263 *type_out = get_vectype_for_scalar_type (use_type);
1264
1265 /* We created a pattern statement for the last statement in the
1266 sequence, so we don't need to associate it with the pattern
1267 statement created for PREV_STMT. Therefore, we add PREV_STMT
1268 to the list in order to mark it later in vect_pattern_recog_1. */
1269 if (prev_stmt)
1270 stmts->safe_push (prev_stmt);
1271 }
1272 else
1273 {
1274 if (prev_stmt)
1275 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1276 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1277
1278 *type_in = vectype;
1279 *type_out = NULL_TREE;
1280 }
1281
1282 stmts->safe_push (use_stmt);
1283 }
1284 else
1285 /* TODO: support general case, create a conversion to the correct type. */
1286 return NULL;
1287
1288 /* Pattern detected. */
1289 if (dump_enabled_p ())
1290 {
1291 dump_printf_loc (MSG_NOTE, vect_location,
1292 "vect_recog_over_widening_pattern: detected: ");
1293 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1294 }
1295
1296 return pattern_stmt;
1297 }
1298
1299 /* Detect widening shift pattern:
1300
1301 type a_t;
1302 TYPE a_T, res_T;
1303
1304 S1 a_t = ;
1305 S2 a_T = (TYPE) a_t;
1306 S3 res_T = a_T << CONST;
1307
1308 where type 'TYPE' is at least double the size of type 'type'.
1309
1310 Also detect cases where the shift result is immediately converted
1311 to another type 'result_type' that is no larger in size than 'TYPE'.
1312 In those cases we perform a widen-shift that directly results in
1313 'result_type', to avoid a possible over-widening situation:
1314
1315 type a_t;
1316 TYPE a_T, res_T;
1317 result_type res_result;
1318
1319 S1 a_t = ;
1320 S2 a_T = (TYPE) a_t;
1321 S3 res_T = a_T << CONST;
1322 S4 res_result = (result_type) res_T;
1323 '--> res_result' = a_t w<< CONST;
1324
1325 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1326 create an additional pattern stmt for S2 to create a variable of an
1327 intermediate type, and perform widen-shift on the intermediate type:
1328
1329 type a_t;
1330 interm_type a_it;
1331 TYPE a_T, res_T, res_T';
1332
1333 S1 a_t = ;
1334 S2 a_T = (TYPE) a_t;
1335 '--> a_it = (interm_type) a_t;
1336 S3 res_T = a_T << CONST;
1337 '--> res_T' = a_it <<* CONST;
1338
1339 Input/Output:
1340
1341 * STMTS: Contains a stmt from which the pattern search begins.
1342 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1343 in STMTS. When an intermediate type is used and a pattern statement is
1344 created for S2, we also put S2 here (before S3).
1345
1346 Output:
1347
1348 * TYPE_IN: The type of the input arguments to the pattern.
1349
1350 * TYPE_OUT: The type of the output of this pattern.
1351
1352 * Return value: A new stmt that will be used to replace the sequence of
1353 stmts that constitute the pattern. In this case it will be:
1354 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1355
1356 static gimple
1357 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1358 tree *type_in, tree *type_out)
1359 {
1360 gimple last_stmt = stmts->pop ();
1361 gimple def_stmt0;
1362 tree oprnd0, oprnd1;
1363 tree type, half_type0;
1364 gimple pattern_stmt;
1365 tree vectype, vectype_out = NULL_TREE;
1366 tree var;
1367 enum tree_code dummy_code;
1368 int dummy_int;
1369 vec<tree> dummy_vec;
1370 gimple use_stmt;
1371 bool promotion;
1372
1373 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1374 return NULL;
1375
1376 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1377 return NULL;
1378
1379 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1380 return NULL;
1381
1382 oprnd0 = gimple_assign_rhs1 (last_stmt);
1383 oprnd1 = gimple_assign_rhs2 (last_stmt);
1384 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1385 return NULL;
1386
1387 /* Check operand 0: it has to be defined by a type promotion. */
1388 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1389 &promotion)
1390 || !promotion)
1391 return NULL;
1392
1393 /* Check operand 1: has to be positive. We check that it fits the type
1394 in vect_handle_widen_op_by_const (). */
1395 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1396 return NULL;
1397
1398 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1399 type = gimple_expr_type (last_stmt);
1400
1401 /* Check for subsequent conversion to another type. */
1402 use_stmt = vect_single_imm_use (last_stmt);
1403 if (use_stmt && is_gimple_assign (use_stmt)
1404 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1405 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1406 {
1407 tree use_lhs = gimple_assign_lhs (use_stmt);
1408 tree use_type = TREE_TYPE (use_lhs);
1409
1410 if (INTEGRAL_TYPE_P (use_type)
1411 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1412 {
1413 last_stmt = use_stmt;
1414 type = use_type;
1415 }
1416 }
1417
1418 /* Check if this a widening operation. */
1419 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1420 &oprnd0, stmts,
1421 type, &half_type0, def_stmt0))
1422 return NULL;
1423
1424 /* Pattern detected. */
1425 if (dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location,
1427 "vect_recog_widen_shift_pattern: detected: ");
1428
1429 /* Check target support. */
1430 vectype = get_vectype_for_scalar_type (half_type0);
1431 vectype_out = get_vectype_for_scalar_type (type);
1432
1433 if (!vectype
1434 || !vectype_out
1435 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1436 vectype_out, vectype,
1437 &dummy_code, &dummy_code,
1438 &dummy_int, &dummy_vec))
1439 return NULL;
1440
1441 *type_in = vectype;
1442 *type_out = vectype_out;
1443
1444 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1445 var = vect_recog_temp_ssa_var (type, NULL);
1446 pattern_stmt =
1447 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1448
1449 if (dump_enabled_p ())
1450 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1451
1452 stmts->safe_push (last_stmt);
1453 return pattern_stmt;
1454 }
1455
1456 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1457
1458 type a_t, b_t, c_t;
1459
1460 S0 a_t = b_t r<< c_t;
1461
1462 Input/Output:
1463
1464 * STMTS: Contains a stmt from which the pattern search begins,
1465 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1466 with a sequence:
1467
1468 S1 d_t = -c_t;
1469 S2 e_t = d_t & (B - 1);
1470 S3 f_t = b_t << c_t;
1471 S4 g_t = b_t >> e_t;
1472 S0 a_t = f_t | g_t;
1473
1474 where B is element bitsize of type.
1475
1476 Output:
1477
1478 * TYPE_IN: The type of the input arguments to the pattern.
1479
1480 * TYPE_OUT: The type of the output of this pattern.
1481
1482 * Return value: A new stmt that will be used to replace the rotate
1483 S0 stmt. */
1484
1485 static gimple
1486 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1487 {
1488 gimple last_stmt = stmts->pop ();
1489 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1490 gimple pattern_stmt, def_stmt;
1491 enum tree_code rhs_code;
1492 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1493 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1494 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1495 enum vect_def_type dt;
1496 optab optab1, optab2;
1497
1498 if (!is_gimple_assign (last_stmt))
1499 return NULL;
1500
1501 rhs_code = gimple_assign_rhs_code (last_stmt);
1502 switch (rhs_code)
1503 {
1504 case LROTATE_EXPR:
1505 case RROTATE_EXPR:
1506 break;
1507 default:
1508 return NULL;
1509 }
1510
1511 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1512 return NULL;
1513
1514 lhs = gimple_assign_lhs (last_stmt);
1515 oprnd0 = gimple_assign_rhs1 (last_stmt);
1516 type = TREE_TYPE (oprnd0);
1517 oprnd1 = gimple_assign_rhs2 (last_stmt);
1518 if (TREE_CODE (oprnd0) != SSA_NAME
1519 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1520 || !INTEGRAL_TYPE_P (type)
1521 || !TYPE_UNSIGNED (type))
1522 return NULL;
1523
1524 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1525 &def, &dt))
1526 return NULL;
1527
1528 if (dt != vect_internal_def
1529 && dt != vect_constant_def
1530 && dt != vect_external_def)
1531 return NULL;
1532
1533 vectype = get_vectype_for_scalar_type (type);
1534 if (vectype == NULL_TREE)
1535 return NULL;
1536
1537 /* If vector/vector or vector/scalar rotate is supported by the target,
1538 don't do anything here. */
1539 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1540 if (optab1
1541 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1542 return NULL;
1543
1544 if (bb_vinfo != NULL || dt != vect_internal_def)
1545 {
1546 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1547 if (optab2
1548 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1549 return NULL;
1550 }
1551
1552 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1553 don't do anything here either. */
1554 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1555 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1556 if (!optab1
1557 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1558 || !optab2
1559 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1560 {
1561 if (bb_vinfo == NULL && dt == vect_internal_def)
1562 return NULL;
1563 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1564 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1565 if (!optab1
1566 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1567 || !optab2
1568 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1569 return NULL;
1570 }
1571
1572 *type_in = vectype;
1573 *type_out = vectype;
1574 if (*type_in == NULL_TREE)
1575 return NULL;
1576
1577 def = NULL_TREE;
1578 if (TREE_CODE (oprnd1) == INTEGER_CST
1579 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1580 def = oprnd1;
1581 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1582 {
1583 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1584 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1585 && TYPE_PRECISION (TREE_TYPE (rhs1))
1586 == TYPE_PRECISION (type))
1587 def = rhs1;
1588 }
1589
1590 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1591 if (def == NULL_TREE)
1592 {
1593 def = vect_recog_temp_ssa_var (type, NULL);
1594 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1595 NULL_TREE);
1596 append_pattern_def_seq (stmt_vinfo, def_stmt);
1597 }
1598 stype = TREE_TYPE (def);
1599
1600 if (TREE_CODE (def) == INTEGER_CST)
1601 {
1602 if (!host_integerp (def, 1)
1603 || (unsigned HOST_WIDE_INT) tree_low_cst (def, 1)
1604 >= GET_MODE_PRECISION (TYPE_MODE (type))
1605 || integer_zerop (def))
1606 return NULL;
1607 def2 = build_int_cst (stype,
1608 GET_MODE_PRECISION (TYPE_MODE (type))
1609 - tree_low_cst (def, 1));
1610 }
1611 else
1612 {
1613 tree vecstype = get_vectype_for_scalar_type (stype);
1614 stmt_vec_info def_stmt_vinfo;
1615
1616 if (vecstype == NULL_TREE)
1617 return NULL;
1618 def2 = vect_recog_temp_ssa_var (stype, NULL);
1619 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1620 NULL_TREE);
1621 def_stmt_vinfo
1622 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1623 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1624 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1625 append_pattern_def_seq (stmt_vinfo, def_stmt);
1626
1627 def2 = vect_recog_temp_ssa_var (stype, NULL);
1628 tree mask
1629 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1630 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1631 gimple_assign_lhs (def_stmt),
1632 mask);
1633 def_stmt_vinfo
1634 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1635 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1636 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1637 append_pattern_def_seq (stmt_vinfo, def_stmt);
1638 }
1639
1640 var1 = vect_recog_temp_ssa_var (type, NULL);
1641 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1642 ? LSHIFT_EXPR : RSHIFT_EXPR,
1643 var1, oprnd0, def);
1644 append_pattern_def_seq (stmt_vinfo, def_stmt);
1645
1646 var2 = vect_recog_temp_ssa_var (type, NULL);
1647 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1648 ? RSHIFT_EXPR : LSHIFT_EXPR,
1649 var2, oprnd0, def2);
1650 append_pattern_def_seq (stmt_vinfo, def_stmt);
1651
1652 /* Pattern detected. */
1653 if (dump_enabled_p ())
1654 dump_printf_loc (MSG_NOTE, vect_location,
1655 "vect_recog_rotate_pattern: detected: ");
1656
1657 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1658 var = vect_recog_temp_ssa_var (type, NULL);
1659 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1660
1661 if (dump_enabled_p ())
1662 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1663
1664 stmts->safe_push (last_stmt);
1665 return pattern_stmt;
1666 }
1667
1668 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1669 vectorized:
1670
1671 type a_t;
1672 TYPE b_T, res_T;
1673
1674 S1 a_t = ;
1675 S2 b_T = ;
1676 S3 res_T = b_T op a_t;
1677
1678 where type 'TYPE' is a type with different size than 'type',
1679 and op is <<, >> or rotate.
1680
1681 Also detect cases:
1682
1683 type a_t;
1684 TYPE b_T, c_T, res_T;
1685
1686 S0 c_T = ;
1687 S1 a_t = (type) c_T;
1688 S2 b_T = ;
1689 S3 res_T = b_T op a_t;
1690
1691 Input/Output:
1692
1693 * STMTS: Contains a stmt from which the pattern search begins,
1694 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1695 with a shift/rotate which has same type on both operands, in the
1696 second case just b_T op c_T, in the first case with added cast
1697 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1698
1699 Output:
1700
1701 * TYPE_IN: The type of the input arguments to the pattern.
1702
1703 * TYPE_OUT: The type of the output of this pattern.
1704
1705 * Return value: A new stmt that will be used to replace the shift/rotate
1706 S3 stmt. */
1707
1708 static gimple
1709 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1710 tree *type_in, tree *type_out)
1711 {
1712 gimple last_stmt = stmts->pop ();
1713 tree oprnd0, oprnd1, lhs, var;
1714 gimple pattern_stmt, def_stmt;
1715 enum tree_code rhs_code;
1716 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1717 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1718 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1719 enum vect_def_type dt;
1720 tree def;
1721
1722 if (!is_gimple_assign (last_stmt))
1723 return NULL;
1724
1725 rhs_code = gimple_assign_rhs_code (last_stmt);
1726 switch (rhs_code)
1727 {
1728 case LSHIFT_EXPR:
1729 case RSHIFT_EXPR:
1730 case LROTATE_EXPR:
1731 case RROTATE_EXPR:
1732 break;
1733 default:
1734 return NULL;
1735 }
1736
1737 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1738 return NULL;
1739
1740 lhs = gimple_assign_lhs (last_stmt);
1741 oprnd0 = gimple_assign_rhs1 (last_stmt);
1742 oprnd1 = gimple_assign_rhs2 (last_stmt);
1743 if (TREE_CODE (oprnd0) != SSA_NAME
1744 || TREE_CODE (oprnd1) != SSA_NAME
1745 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1746 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1747 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1748 || TYPE_PRECISION (TREE_TYPE (lhs))
1749 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1750 return NULL;
1751
1752 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1753 &def, &dt))
1754 return NULL;
1755
1756 if (dt != vect_internal_def)
1757 return NULL;
1758
1759 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1760 *type_out = *type_in;
1761 if (*type_in == NULL_TREE)
1762 return NULL;
1763
1764 def = NULL_TREE;
1765 if (gimple_assign_cast_p (def_stmt))
1766 {
1767 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1768 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1769 && TYPE_PRECISION (TREE_TYPE (rhs1))
1770 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1771 def = rhs1;
1772 }
1773
1774 if (def == NULL_TREE)
1775 {
1776 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1777 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1778 NULL_TREE);
1779 new_pattern_def_seq (stmt_vinfo, def_stmt);
1780 }
1781
1782 /* Pattern detected. */
1783 if (dump_enabled_p ())
1784 dump_printf_loc (MSG_NOTE, vect_location,
1785 "vect_recog_vector_vector_shift_pattern: detected: ");
1786
1787 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1788 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1789 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1790
1791 if (dump_enabled_p ())
1792 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1793
1794 stmts->safe_push (last_stmt);
1795 return pattern_stmt;
1796 }
1797
1798 /* Detect a signed division by a constant that wouldn't be
1799 otherwise vectorized:
1800
1801 type a_t, b_t;
1802
1803 S1 a_t = b_t / N;
1804
1805 where type 'type' is an integral type and N is a constant.
1806
1807 Similarly handle modulo by a constant:
1808
1809 S4 a_t = b_t % N;
1810
1811 Input/Output:
1812
1813 * STMTS: Contains a stmt from which the pattern search begins,
1814 i.e. the division stmt. S1 is replaced by if N is a power
1815 of two constant and type is signed:
1816 S3 y_t = b_t < 0 ? N - 1 : 0;
1817 S2 x_t = b_t + y_t;
1818 S1' a_t = x_t >> log2 (N);
1819
1820 S4 is replaced if N is a power of two constant and
1821 type is signed by (where *_T temporaries have unsigned type):
1822 S9 y_T = b_t < 0 ? -1U : 0U;
1823 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1824 S7 z_t = (type) z_T;
1825 S6 w_t = b_t + z_t;
1826 S5 x_t = w_t & (N - 1);
1827 S4' a_t = x_t - z_t;
1828
1829 Output:
1830
1831 * TYPE_IN: The type of the input arguments to the pattern.
1832
1833 * TYPE_OUT: The type of the output of this pattern.
1834
1835 * Return value: A new stmt that will be used to replace the division
1836 S1 or modulo S4 stmt. */
1837
1838 static gimple
1839 vect_recog_divmod_pattern (vec<gimple> *stmts,
1840 tree *type_in, tree *type_out)
1841 {
1842 gimple last_stmt = stmts->pop ();
1843 tree oprnd0, oprnd1, vectype, itype, cond;
1844 gimple pattern_stmt, def_stmt;
1845 enum tree_code rhs_code;
1846 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1847 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1848 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1849 optab optab;
1850 tree q;
1851 int dummy_int, prec;
1852 stmt_vec_info def_stmt_vinfo;
1853
1854 if (!is_gimple_assign (last_stmt))
1855 return NULL;
1856
1857 rhs_code = gimple_assign_rhs_code (last_stmt);
1858 switch (rhs_code)
1859 {
1860 case TRUNC_DIV_EXPR:
1861 case TRUNC_MOD_EXPR:
1862 break;
1863 default:
1864 return NULL;
1865 }
1866
1867 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1868 return NULL;
1869
1870 oprnd0 = gimple_assign_rhs1 (last_stmt);
1871 oprnd1 = gimple_assign_rhs2 (last_stmt);
1872 itype = TREE_TYPE (oprnd0);
1873 if (TREE_CODE (oprnd0) != SSA_NAME
1874 || TREE_CODE (oprnd1) != INTEGER_CST
1875 || TREE_CODE (itype) != INTEGER_TYPE
1876 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1877 return NULL;
1878
1879 vectype = get_vectype_for_scalar_type (itype);
1880 if (vectype == NULL_TREE)
1881 return NULL;
1882
1883 /* If the target can handle vectorized division or modulo natively,
1884 don't attempt to optimize this. */
1885 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1886 if (optab != unknown_optab)
1887 {
1888 enum machine_mode vec_mode = TYPE_MODE (vectype);
1889 int icode = (int) optab_handler (optab, vec_mode);
1890 if (icode != CODE_FOR_nothing)
1891 return NULL;
1892 }
1893
1894 prec = TYPE_PRECISION (itype);
1895 if (integer_pow2p (oprnd1))
1896 {
1897 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1898 return NULL;
1899
1900 /* Pattern detected. */
1901 if (dump_enabled_p ())
1902 dump_printf_loc (MSG_NOTE, vect_location,
1903 "vect_recog_divmod_pattern: detected: ");
1904
1905 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1906 build_int_cst (itype, 0));
1907 if (rhs_code == TRUNC_DIV_EXPR)
1908 {
1909 tree var = vect_recog_temp_ssa_var (itype, NULL);
1910 tree shift;
1911 def_stmt
1912 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1913 fold_build2 (MINUS_EXPR, itype,
1914 oprnd1,
1915 build_int_cst (itype,
1916 1)),
1917 build_int_cst (itype, 0));
1918 new_pattern_def_seq (stmt_vinfo, def_stmt);
1919 var = vect_recog_temp_ssa_var (itype, NULL);
1920 def_stmt
1921 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1922 gimple_assign_lhs (def_stmt));
1923 append_pattern_def_seq (stmt_vinfo, def_stmt);
1924
1925 shift = build_int_cst (itype, tree_log2 (oprnd1));
1926 pattern_stmt
1927 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1928 vect_recog_temp_ssa_var (itype,
1929 NULL),
1930 var, shift);
1931 }
1932 else
1933 {
1934 tree signmask;
1935 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1936 if (compare_tree_int (oprnd1, 2) == 0)
1937 {
1938 signmask = vect_recog_temp_ssa_var (itype, NULL);
1939 def_stmt
1940 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1941 build_int_cst (itype, 1),
1942 build_int_cst (itype, 0));
1943 append_pattern_def_seq (stmt_vinfo, def_stmt);
1944 }
1945 else
1946 {
1947 tree utype
1948 = build_nonstandard_integer_type (prec, 1);
1949 tree vecutype = get_vectype_for_scalar_type (utype);
1950 tree shift
1951 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1952 - tree_log2 (oprnd1));
1953 tree var = vect_recog_temp_ssa_var (utype, NULL);
1954
1955 def_stmt
1956 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1957 build_int_cst (utype, -1),
1958 build_int_cst (utype, 0));
1959 def_stmt_vinfo
1960 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1961 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1962 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1963 append_pattern_def_seq (stmt_vinfo, def_stmt);
1964 var = vect_recog_temp_ssa_var (utype, NULL);
1965 def_stmt
1966 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1967 gimple_assign_lhs (def_stmt),
1968 shift);
1969 def_stmt_vinfo
1970 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1971 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1972 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1973 append_pattern_def_seq (stmt_vinfo, def_stmt);
1974 signmask = vect_recog_temp_ssa_var (itype, NULL);
1975 def_stmt
1976 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1977 NULL_TREE);
1978 append_pattern_def_seq (stmt_vinfo, def_stmt);
1979 }
1980 def_stmt
1981 = gimple_build_assign_with_ops (PLUS_EXPR,
1982 vect_recog_temp_ssa_var (itype,
1983 NULL),
1984 oprnd0, signmask);
1985 append_pattern_def_seq (stmt_vinfo, def_stmt);
1986 def_stmt
1987 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1988 vect_recog_temp_ssa_var (itype,
1989 NULL),
1990 gimple_assign_lhs (def_stmt),
1991 fold_build2 (MINUS_EXPR, itype,
1992 oprnd1,
1993 build_int_cst (itype,
1994 1)));
1995 append_pattern_def_seq (stmt_vinfo, def_stmt);
1996
1997 pattern_stmt
1998 = gimple_build_assign_with_ops (MINUS_EXPR,
1999 vect_recog_temp_ssa_var (itype,
2000 NULL),
2001 gimple_assign_lhs (def_stmt),
2002 signmask);
2003 }
2004
2005 if (dump_enabled_p ())
2006 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2007 0);
2008
2009 stmts->safe_push (last_stmt);
2010
2011 *type_in = vectype;
2012 *type_out = vectype;
2013 return pattern_stmt;
2014 }
2015
2016 if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
2017 || integer_zerop (oprnd1)
2018 || prec > HOST_BITS_PER_WIDE_INT)
2019 return NULL;
2020
2021 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2022 return NULL;
2023
2024 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2025
2026 if (TYPE_UNSIGNED (itype))
2027 {
2028 unsigned HOST_WIDE_INT mh, ml;
2029 int pre_shift, post_shift;
2030 unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
2031 & GET_MODE_MASK (TYPE_MODE (itype));
2032 tree t1, t2, t3, t4;
2033
2034 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2035 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2036 return NULL;
2037
2038 /* Find a suitable multiplier and right shift count
2039 instead of multiplying with D. */
2040 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2041
2042 /* If the suggested multiplier is more than SIZE bits, we can do better
2043 for even divisors, using an initial right shift. */
2044 if (mh != 0 && (d & 1) == 0)
2045 {
2046 pre_shift = floor_log2 (d & -d);
2047 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2048 &ml, &post_shift, &dummy_int);
2049 gcc_assert (!mh);
2050 }
2051 else
2052 pre_shift = 0;
2053
2054 if (mh != 0)
2055 {
2056 if (post_shift - 1 >= prec)
2057 return NULL;
2058
2059 /* t1 = oprnd0 h* ml;
2060 t2 = oprnd0 - t1;
2061 t3 = t2 >> 1;
2062 t4 = t1 + t3;
2063 q = t4 >> (post_shift - 1); */
2064 t1 = vect_recog_temp_ssa_var (itype, NULL);
2065 def_stmt
2066 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2067 build_int_cst (itype, ml));
2068 append_pattern_def_seq (stmt_vinfo, def_stmt);
2069
2070 t2 = vect_recog_temp_ssa_var (itype, NULL);
2071 def_stmt
2072 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2073 append_pattern_def_seq (stmt_vinfo, def_stmt);
2074
2075 t3 = vect_recog_temp_ssa_var (itype, NULL);
2076 def_stmt
2077 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2078 integer_one_node);
2079 append_pattern_def_seq (stmt_vinfo, def_stmt);
2080
2081 t4 = vect_recog_temp_ssa_var (itype, NULL);
2082 def_stmt
2083 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2084
2085 if (post_shift != 1)
2086 {
2087 append_pattern_def_seq (stmt_vinfo, def_stmt);
2088
2089 q = vect_recog_temp_ssa_var (itype, NULL);
2090 pattern_stmt
2091 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2092 build_int_cst (itype,
2093 post_shift
2094 - 1));
2095 }
2096 else
2097 {
2098 q = t4;
2099 pattern_stmt = def_stmt;
2100 }
2101 }
2102 else
2103 {
2104 if (pre_shift >= prec || post_shift >= prec)
2105 return NULL;
2106
2107 /* t1 = oprnd0 >> pre_shift;
2108 t2 = t1 h* ml;
2109 q = t2 >> post_shift; */
2110 if (pre_shift)
2111 {
2112 t1 = vect_recog_temp_ssa_var (itype, NULL);
2113 def_stmt
2114 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2115 build_int_cst (NULL,
2116 pre_shift));
2117 append_pattern_def_seq (stmt_vinfo, def_stmt);
2118 }
2119 else
2120 t1 = oprnd0;
2121
2122 t2 = vect_recog_temp_ssa_var (itype, NULL);
2123 def_stmt
2124 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2125 build_int_cst (itype, ml));
2126
2127 if (post_shift)
2128 {
2129 append_pattern_def_seq (stmt_vinfo, def_stmt);
2130
2131 q = vect_recog_temp_ssa_var (itype, NULL);
2132 def_stmt
2133 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2134 build_int_cst (itype,
2135 post_shift));
2136 }
2137 else
2138 q = t2;
2139
2140 pattern_stmt = def_stmt;
2141 }
2142 }
2143 else
2144 {
2145 unsigned HOST_WIDE_INT ml;
2146 int post_shift;
2147 HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
2148 unsigned HOST_WIDE_INT abs_d;
2149 bool add = false;
2150 tree t1, t2, t3, t4;
2151
2152 /* Give up for -1. */
2153 if (d == -1)
2154 return NULL;
2155
2156 /* Since d might be INT_MIN, we have to cast to
2157 unsigned HOST_WIDE_INT before negating to avoid
2158 undefined signed overflow. */
2159 abs_d = (d >= 0
2160 ? (unsigned HOST_WIDE_INT) d
2161 : - (unsigned HOST_WIDE_INT) d);
2162
2163 /* n rem d = n rem -d */
2164 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2165 {
2166 d = abs_d;
2167 oprnd1 = build_int_cst (itype, abs_d);
2168 }
2169 else if (HOST_BITS_PER_WIDE_INT >= prec
2170 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2171 /* This case is not handled correctly below. */
2172 return NULL;
2173
2174 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2175 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2176 {
2177 add = true;
2178 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2179 }
2180 if (post_shift >= prec)
2181 return NULL;
2182
2183 /* t1 = oprnd1 h* ml; */
2184 t1 = vect_recog_temp_ssa_var (itype, NULL);
2185 def_stmt
2186 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2187 build_int_cst (itype, ml));
2188 append_pattern_def_seq (stmt_vinfo, def_stmt);
2189
2190 if (add)
2191 {
2192 /* t2 = t1 + oprnd0; */
2193 t2 = vect_recog_temp_ssa_var (itype, NULL);
2194 def_stmt
2195 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2196 append_pattern_def_seq (stmt_vinfo, def_stmt);
2197 }
2198 else
2199 t2 = t1;
2200
2201 if (post_shift)
2202 {
2203 /* t3 = t2 >> post_shift; */
2204 t3 = vect_recog_temp_ssa_var (itype, NULL);
2205 def_stmt
2206 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2207 build_int_cst (itype, post_shift));
2208 append_pattern_def_seq (stmt_vinfo, def_stmt);
2209 }
2210 else
2211 t3 = t2;
2212
2213 /* t4 = oprnd0 >> (prec - 1); */
2214 t4 = vect_recog_temp_ssa_var (itype, NULL);
2215 def_stmt
2216 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2217 build_int_cst (itype, prec - 1));
2218 append_pattern_def_seq (stmt_vinfo, def_stmt);
2219
2220 /* q = t3 - t4; or q = t4 - t3; */
2221 q = vect_recog_temp_ssa_var (itype, NULL);
2222 pattern_stmt
2223 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2224 d < 0 ? t3 : t4);
2225 }
2226
2227 if (rhs_code == TRUNC_MOD_EXPR)
2228 {
2229 tree r, t1;
2230
2231 /* We divided. Now finish by:
2232 t1 = q * oprnd1;
2233 r = oprnd0 - t1; */
2234 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2235
2236 t1 = vect_recog_temp_ssa_var (itype, NULL);
2237 def_stmt
2238 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2239 append_pattern_def_seq (stmt_vinfo, def_stmt);
2240
2241 r = vect_recog_temp_ssa_var (itype, NULL);
2242 pattern_stmt
2243 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2244 }
2245
2246 /* Pattern detected. */
2247 if (dump_enabled_p ())
2248 {
2249 dump_printf_loc (MSG_NOTE, vect_location,
2250 "vect_recog_divmod_pattern: detected: ");
2251 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2252 }
2253
2254 stmts->safe_push (last_stmt);
2255
2256 *type_in = vectype;
2257 *type_out = vectype;
2258 return pattern_stmt;
2259 }
2260
2261 /* Function vect_recog_mixed_size_cond_pattern
2262
2263 Try to find the following pattern:
2264
2265 type x_t, y_t;
2266 TYPE a_T, b_T, c_T;
2267 loop:
2268 S1 a_T = x_t CMP y_t ? b_T : c_T;
2269
2270 where type 'TYPE' is an integral type which has different size
2271 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2272 than 'type', the constants need to fit into an integer type
2273 with the same width as 'type') or results of conversion from 'type'.
2274
2275 Input:
2276
2277 * LAST_STMT: A stmt from which the pattern search begins.
2278
2279 Output:
2280
2281 * TYPE_IN: The type of the input arguments to the pattern.
2282
2283 * TYPE_OUT: The type of the output of this pattern.
2284
2285 * Return value: A new stmt that will be used to replace the pattern.
2286 Additionally a def_stmt is added.
2287
2288 a_it = x_t CMP y_t ? b_it : c_it;
2289 a_T = (TYPE) a_it; */
2290
2291 static gimple
2292 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2293 tree *type_out)
2294 {
2295 gimple last_stmt = (*stmts)[0];
2296 tree cond_expr, then_clause, else_clause;
2297 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2298 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2299 enum machine_mode cmpmode;
2300 gimple pattern_stmt, def_stmt;
2301 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2302 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2303 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2304 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2305 bool promotion;
2306 tree comp_scalar_type;
2307
2308 if (!is_gimple_assign (last_stmt)
2309 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2310 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2311 return NULL;
2312
2313 cond_expr = gimple_assign_rhs1 (last_stmt);
2314 then_clause = gimple_assign_rhs2 (last_stmt);
2315 else_clause = gimple_assign_rhs3 (last_stmt);
2316
2317 if (!COMPARISON_CLASS_P (cond_expr))
2318 return NULL;
2319
2320 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2321 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2322 if (comp_vectype == NULL_TREE)
2323 return NULL;
2324
2325 type = gimple_expr_type (last_stmt);
2326 if (types_compatible_p (type, comp_scalar_type)
2327 || ((TREE_CODE (then_clause) != INTEGER_CST
2328 || TREE_CODE (else_clause) != INTEGER_CST)
2329 && !INTEGRAL_TYPE_P (comp_scalar_type))
2330 || !INTEGRAL_TYPE_P (type))
2331 return NULL;
2332
2333 if ((TREE_CODE (then_clause) != INTEGER_CST
2334 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2335 &def_stmt0, &promotion))
2336 || (TREE_CODE (else_clause) != INTEGER_CST
2337 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2338 &def_stmt1, &promotion)))
2339 return NULL;
2340
2341 if (orig_type0 && orig_type1
2342 && !types_compatible_p (orig_type0, orig_type1))
2343 return NULL;
2344
2345 if (orig_type0)
2346 {
2347 if (!types_compatible_p (orig_type0, comp_scalar_type))
2348 return NULL;
2349 then_clause = gimple_assign_rhs1 (def_stmt0);
2350 itype = orig_type0;
2351 }
2352
2353 if (orig_type1)
2354 {
2355 if (!types_compatible_p (orig_type1, comp_scalar_type))
2356 return NULL;
2357 else_clause = gimple_assign_rhs1 (def_stmt1);
2358 itype = orig_type1;
2359 }
2360
2361 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2362
2363 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2364 return NULL;
2365
2366 vectype = get_vectype_for_scalar_type (type);
2367 if (vectype == NULL_TREE)
2368 return NULL;
2369
2370 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2371 return NULL;
2372
2373 if (itype == NULL_TREE)
2374 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2375 TYPE_UNSIGNED (type));
2376
2377 if (itype == NULL_TREE
2378 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2379 return NULL;
2380
2381 vecitype = get_vectype_for_scalar_type (itype);
2382 if (vecitype == NULL_TREE)
2383 return NULL;
2384
2385 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2386 return NULL;
2387
2388 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2389 {
2390 if ((TREE_CODE (then_clause) == INTEGER_CST
2391 && !int_fits_type_p (then_clause, itype))
2392 || (TREE_CODE (else_clause) == INTEGER_CST
2393 && !int_fits_type_p (else_clause, itype)))
2394 return NULL;
2395 }
2396
2397 def_stmt
2398 = gimple_build_assign_with_ops (COND_EXPR,
2399 vect_recog_temp_ssa_var (itype, NULL),
2400 unshare_expr (cond_expr),
2401 fold_convert (itype, then_clause),
2402 fold_convert (itype, else_clause));
2403 pattern_stmt
2404 = gimple_build_assign_with_ops (NOP_EXPR,
2405 vect_recog_temp_ssa_var (type, NULL),
2406 gimple_assign_lhs (def_stmt), NULL_TREE);
2407
2408 new_pattern_def_seq (stmt_vinfo, def_stmt);
2409 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2410 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2411 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2412 *type_in = vecitype;
2413 *type_out = vectype;
2414
2415 if (dump_enabled_p ())
2416 dump_printf_loc (MSG_NOTE, vect_location,
2417 "vect_recog_mixed_size_cond_pattern: detected: ");
2418
2419 return pattern_stmt;
2420 }
2421
2422
2423 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2424 true if bool VAR can be optimized that way. */
2425
2426 static bool
2427 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2428 {
2429 gimple def_stmt;
2430 enum vect_def_type dt;
2431 tree def, rhs1;
2432 enum tree_code rhs_code;
2433
2434 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2435 &dt))
2436 return false;
2437
2438 if (dt != vect_internal_def)
2439 return false;
2440
2441 if (!is_gimple_assign (def_stmt))
2442 return false;
2443
2444 if (!has_single_use (def))
2445 return false;
2446
2447 rhs1 = gimple_assign_rhs1 (def_stmt);
2448 rhs_code = gimple_assign_rhs_code (def_stmt);
2449 switch (rhs_code)
2450 {
2451 case SSA_NAME:
2452 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2453
2454 CASE_CONVERT:
2455 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2456 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2457 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2458 return false;
2459 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2460
2461 case BIT_NOT_EXPR:
2462 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2463
2464 case BIT_AND_EXPR:
2465 case BIT_IOR_EXPR:
2466 case BIT_XOR_EXPR:
2467 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2468 return false;
2469 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2470 bb_vinfo);
2471
2472 default:
2473 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2474 {
2475 tree vecitype, comp_vectype;
2476
2477 /* If the comparison can throw, then is_gimple_condexpr will be
2478 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2479 if (stmt_could_throw_p (def_stmt))
2480 return false;
2481
2482 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2483 if (comp_vectype == NULL_TREE)
2484 return false;
2485
2486 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2487 {
2488 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2489 tree itype
2490 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2491 vecitype = get_vectype_for_scalar_type (itype);
2492 if (vecitype == NULL_TREE)
2493 return false;
2494 }
2495 else
2496 vecitype = comp_vectype;
2497 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2498 }
2499 return false;
2500 }
2501 }
2502
2503
2504 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2505 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2506 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2507
2508 static tree
2509 adjust_bool_pattern_cast (tree type, tree var)
2510 {
2511 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2512 gimple cast_stmt, pattern_stmt;
2513
2514 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2515 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2516 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2517 cast_stmt
2518 = gimple_build_assign_with_ops (NOP_EXPR,
2519 vect_recog_temp_ssa_var (type, NULL),
2520 gimple_assign_lhs (pattern_stmt),
2521 NULL_TREE);
2522 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2523 return gimple_assign_lhs (cast_stmt);
2524 }
2525
2526
2527 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2528 recursively. VAR is an SSA_NAME that should be transformed from bool
2529 to a wider integer type, OUT_TYPE is the desired final integer type of
2530 the whole pattern, TRUEVAL should be NULL unless optimizing
2531 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2532 in the then_clause, STMTS is where statements with added pattern stmts
2533 should be pushed to. */
2534
2535 static tree
2536 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2537 vec<gimple> *stmts)
2538 {
2539 gimple stmt = SSA_NAME_DEF_STMT (var);
2540 enum tree_code rhs_code, def_rhs_code;
2541 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2542 location_t loc;
2543 gimple pattern_stmt, def_stmt;
2544
2545 rhs1 = gimple_assign_rhs1 (stmt);
2546 rhs2 = gimple_assign_rhs2 (stmt);
2547 rhs_code = gimple_assign_rhs_code (stmt);
2548 loc = gimple_location (stmt);
2549 switch (rhs_code)
2550 {
2551 case SSA_NAME:
2552 CASE_CONVERT:
2553 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2554 itype = TREE_TYPE (irhs1);
2555 pattern_stmt
2556 = gimple_build_assign_with_ops (SSA_NAME,
2557 vect_recog_temp_ssa_var (itype, NULL),
2558 irhs1, NULL_TREE);
2559 break;
2560
2561 case BIT_NOT_EXPR:
2562 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2563 itype = TREE_TYPE (irhs1);
2564 pattern_stmt
2565 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2566 vect_recog_temp_ssa_var (itype, NULL),
2567 irhs1, build_int_cst (itype, 1));
2568 break;
2569
2570 case BIT_AND_EXPR:
2571 /* Try to optimize x = y & (a < b ? 1 : 0); into
2572 x = (a < b ? y : 0);
2573
2574 E.g. for:
2575 bool a_b, b_b, c_b;
2576 TYPE d_T;
2577
2578 S1 a_b = x1 CMP1 y1;
2579 S2 b_b = x2 CMP2 y2;
2580 S3 c_b = a_b & b_b;
2581 S4 d_T = (TYPE) c_b;
2582
2583 we would normally emit:
2584
2585 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2586 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2587 S3' c_T = a_T & b_T;
2588 S4' d_T = c_T;
2589
2590 but we can save one stmt by using the
2591 result of one of the COND_EXPRs in the other COND_EXPR and leave
2592 BIT_AND_EXPR stmt out:
2593
2594 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2595 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2596 S4' f_T = c_T;
2597
2598 At least when VEC_COND_EXPR is implemented using masks
2599 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2600 computes the comparison masks and ands it, in one case with
2601 all ones vector, in the other case with a vector register.
2602 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2603 often more expensive. */
2604 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2605 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2606 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2607 {
2608 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2609 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2610 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2611 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2612 {
2613 gimple tstmt;
2614 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2615 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2616 tstmt = stmts->pop ();
2617 gcc_assert (tstmt == def_stmt);
2618 stmts->quick_push (stmt);
2619 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2620 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2621 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2622 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2623 return irhs2;
2624 }
2625 else
2626 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2627 goto and_ior_xor;
2628 }
2629 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2630 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2631 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2632 {
2633 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2634 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2635 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2636 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2637 {
2638 gimple tstmt;
2639 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2640 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2641 tstmt = stmts->pop ();
2642 gcc_assert (tstmt == def_stmt);
2643 stmts->quick_push (stmt);
2644 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2645 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2646 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2647 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2648 return irhs1;
2649 }
2650 else
2651 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2652 goto and_ior_xor;
2653 }
2654 /* FALLTHRU */
2655 case BIT_IOR_EXPR:
2656 case BIT_XOR_EXPR:
2657 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2658 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2659 and_ior_xor:
2660 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2661 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2662 {
2663 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2664 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2665 int out_prec = TYPE_PRECISION (out_type);
2666 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2667 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2668 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2669 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2670 else
2671 {
2672 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2673 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2674 }
2675 }
2676 itype = TREE_TYPE (irhs1);
2677 pattern_stmt
2678 = gimple_build_assign_with_ops (rhs_code,
2679 vect_recog_temp_ssa_var (itype, NULL),
2680 irhs1, irhs2);
2681 break;
2682
2683 default:
2684 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2685 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2686 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2687 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2688 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2689 {
2690 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2691 itype
2692 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2693 }
2694 else
2695 itype = TREE_TYPE (rhs1);
2696 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2697 if (trueval == NULL_TREE)
2698 trueval = build_int_cst (itype, 1);
2699 else
2700 gcc_checking_assert (useless_type_conversion_p (itype,
2701 TREE_TYPE (trueval)));
2702 pattern_stmt
2703 = gimple_build_assign_with_ops (COND_EXPR,
2704 vect_recog_temp_ssa_var (itype, NULL),
2705 cond_expr, trueval,
2706 build_int_cst (itype, 0));
2707 break;
2708 }
2709
2710 stmts->safe_push (stmt);
2711 gimple_set_location (pattern_stmt, loc);
2712 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2713 return gimple_assign_lhs (pattern_stmt);
2714 }
2715
2716
2717 /* Function vect_recog_bool_pattern
2718
2719 Try to find pattern like following:
2720
2721 bool a_b, b_b, c_b, d_b, e_b;
2722 TYPE f_T;
2723 loop:
2724 S1 a_b = x1 CMP1 y1;
2725 S2 b_b = x2 CMP2 y2;
2726 S3 c_b = a_b & b_b;
2727 S4 d_b = x3 CMP3 y3;
2728 S5 e_b = c_b | d_b;
2729 S6 f_T = (TYPE) e_b;
2730
2731 where type 'TYPE' is an integral type.
2732
2733 Input:
2734
2735 * LAST_STMT: A stmt at the end from which the pattern
2736 search begins, i.e. cast of a bool to
2737 an integer type.
2738
2739 Output:
2740
2741 * TYPE_IN: The type of the input arguments to the pattern.
2742
2743 * TYPE_OUT: The type of the output of this pattern.
2744
2745 * Return value: A new stmt that will be used to replace the pattern.
2746
2747 Assuming size of TYPE is the same as size of all comparisons
2748 (otherwise some casts would be added where needed), the above
2749 sequence we create related pattern stmts:
2750 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2751 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2752 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2753 S5' e_T = c_T | d_T;
2754 S6' f_T = e_T;
2755
2756 Instead of the above S3' we could emit:
2757 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2758 S3' c_T = a_T | b_T;
2759 but the above is more efficient. */
2760
2761 static gimple
2762 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2763 tree *type_out)
2764 {
2765 gimple last_stmt = stmts->pop ();
2766 enum tree_code rhs_code;
2767 tree var, lhs, rhs, vectype;
2768 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2769 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2770 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2771 gimple pattern_stmt;
2772
2773 if (!is_gimple_assign (last_stmt))
2774 return NULL;
2775
2776 var = gimple_assign_rhs1 (last_stmt);
2777 lhs = gimple_assign_lhs (last_stmt);
2778
2779 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2780 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2781 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2782 return NULL;
2783
2784 rhs_code = gimple_assign_rhs_code (last_stmt);
2785 if (CONVERT_EXPR_CODE_P (rhs_code))
2786 {
2787 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2788 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2789 return NULL;
2790 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2791 if (vectype == NULL_TREE)
2792 return NULL;
2793
2794 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2795 return NULL;
2796
2797 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2798 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2799 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2800 pattern_stmt
2801 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2802 else
2803 pattern_stmt
2804 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2805 *type_out = vectype;
2806 *type_in = vectype;
2807 stmts->safe_push (last_stmt);
2808 if (dump_enabled_p ())
2809 dump_printf_loc (MSG_NOTE, vect_location,
2810 "vect_recog_bool_pattern: detected: ");
2811
2812 return pattern_stmt;
2813 }
2814 else if (rhs_code == SSA_NAME
2815 && STMT_VINFO_DATA_REF (stmt_vinfo))
2816 {
2817 stmt_vec_info pattern_stmt_info;
2818 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2819 gcc_assert (vectype != NULL_TREE);
2820 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2821 return NULL;
2822 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2823 return NULL;
2824
2825 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2826 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2827 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2828 {
2829 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2830 gimple cast_stmt
2831 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2832 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2833 rhs = rhs2;
2834 }
2835 pattern_stmt
2836 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2837 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2838 bb_vinfo);
2839 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2840 STMT_VINFO_DATA_REF (pattern_stmt_info)
2841 = STMT_VINFO_DATA_REF (stmt_vinfo);
2842 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2843 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2844 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2845 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2846 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2847 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2848 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2849 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2850 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2851 *type_out = vectype;
2852 *type_in = vectype;
2853 stmts->safe_push (last_stmt);
2854 if (dump_enabled_p ())
2855 dump_printf_loc (MSG_NOTE, vect_location,
2856 "vect_recog_bool_pattern: detected: ");
2857 return pattern_stmt;
2858 }
2859 else
2860 return NULL;
2861 }
2862
2863
2864 /* Mark statements that are involved in a pattern. */
2865
2866 static inline void
2867 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2868 tree pattern_vectype)
2869 {
2870 stmt_vec_info pattern_stmt_info, def_stmt_info;
2871 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2872 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2873 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2874 gimple def_stmt;
2875
2876 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2877 if (pattern_stmt_info == NULL)
2878 {
2879 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2880 bb_vinfo);
2881 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2882 }
2883 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2884
2885 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2886 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2887 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2888 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2889 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2890 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2891 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2892 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2893 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2894 {
2895 gimple_stmt_iterator si;
2896 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2897 !gsi_end_p (si); gsi_next (&si))
2898 {
2899 def_stmt = gsi_stmt (si);
2900 def_stmt_info = vinfo_for_stmt (def_stmt);
2901 if (def_stmt_info == NULL)
2902 {
2903 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2904 bb_vinfo);
2905 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2906 }
2907 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2908 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2909 STMT_VINFO_DEF_TYPE (def_stmt_info)
2910 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2911 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2912 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2913 }
2914 }
2915 }
2916
2917 /* Function vect_pattern_recog_1
2918
2919 Input:
2920 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2921 computation pattern.
2922 STMT: A stmt from which the pattern search should start.
2923
2924 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2925 expression that computes the same functionality and can be used to
2926 replace the sequence of stmts that are involved in the pattern.
2927
2928 Output:
2929 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2930 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2931 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2932 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2933 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2934 to the available target pattern.
2935
2936 This function also does some bookkeeping, as explained in the documentation
2937 for vect_recog_pattern. */
2938
2939 static void
2940 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2941 gimple_stmt_iterator si,
2942 vec<gimple> *stmts_to_replace)
2943 {
2944 gimple stmt = gsi_stmt (si), pattern_stmt;
2945 stmt_vec_info stmt_info;
2946 loop_vec_info loop_vinfo;
2947 tree pattern_vectype;
2948 tree type_in, type_out;
2949 enum tree_code code;
2950 int i;
2951 gimple next;
2952
2953 stmts_to_replace->truncate (0);
2954 stmts_to_replace->quick_push (stmt);
2955 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2956 if (!pattern_stmt)
2957 return;
2958
2959 stmt = stmts_to_replace->last ();
2960 stmt_info = vinfo_for_stmt (stmt);
2961 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2962
2963 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2964 {
2965 /* No need to check target support (already checked by the pattern
2966 recognition function). */
2967 pattern_vectype = type_out ? type_out : type_in;
2968 }
2969 else
2970 {
2971 enum machine_mode vec_mode;
2972 enum insn_code icode;
2973 optab optab;
2974
2975 /* Check target support */
2976 type_in = get_vectype_for_scalar_type (type_in);
2977 if (!type_in)
2978 return;
2979 if (type_out)
2980 type_out = get_vectype_for_scalar_type (type_out);
2981 else
2982 type_out = type_in;
2983 if (!type_out)
2984 return;
2985 pattern_vectype = type_out;
2986
2987 if (is_gimple_assign (pattern_stmt))
2988 code = gimple_assign_rhs_code (pattern_stmt);
2989 else
2990 {
2991 gcc_assert (is_gimple_call (pattern_stmt));
2992 code = CALL_EXPR;
2993 }
2994
2995 optab = optab_for_tree_code (code, type_in, optab_default);
2996 vec_mode = TYPE_MODE (type_in);
2997 if (!optab
2998 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2999 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3000 return;
3001 }
3002
3003 /* Found a vectorizable pattern. */
3004 if (dump_enabled_p ())
3005 {
3006 dump_printf_loc (MSG_NOTE, vect_location,
3007 "pattern recognized: ");
3008 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3009 }
3010
3011 /* Mark the stmts that are involved in the pattern. */
3012 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3013
3014 /* Patterns cannot be vectorized using SLP, because they change the order of
3015 computation. */
3016 if (loop_vinfo)
3017 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3018 if (next == stmt)
3019 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3020
3021 /* It is possible that additional pattern stmts are created and inserted in
3022 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3023 relevant statements. */
3024 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3025 && (unsigned) i < (stmts_to_replace->length () - 1);
3026 i++)
3027 {
3028 stmt_info = vinfo_for_stmt (stmt);
3029 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3030 if (dump_enabled_p ())
3031 {
3032 dump_printf_loc (MSG_NOTE, vect_location,
3033 "additional pattern stmt: ");
3034 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3035 }
3036
3037 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3038 }
3039 }
3040
3041
3042 /* Function vect_pattern_recog
3043
3044 Input:
3045 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3046 computation idioms.
3047
3048 Output - for each computation idiom that is detected we create a new stmt
3049 that provides the same functionality and that can be vectorized. We
3050 also record some information in the struct_stmt_info of the relevant
3051 stmts, as explained below:
3052
3053 At the entry to this function we have the following stmts, with the
3054 following initial value in the STMT_VINFO fields:
3055
3056 stmt in_pattern_p related_stmt vec_stmt
3057 S1: a_i = .... - - -
3058 S2: a_2 = ..use(a_i).. - - -
3059 S3: a_1 = ..use(a_2).. - - -
3060 S4: a_0 = ..use(a_1).. - - -
3061 S5: ... = ..use(a_0).. - - -
3062
3063 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3064 represented by a single stmt. We then:
3065 - create a new stmt S6 equivalent to the pattern (the stmt is not
3066 inserted into the code)
3067 - fill in the STMT_VINFO fields as follows:
3068
3069 in_pattern_p related_stmt vec_stmt
3070 S1: a_i = .... - - -
3071 S2: a_2 = ..use(a_i).. - - -
3072 S3: a_1 = ..use(a_2).. - - -
3073 S4: a_0 = ..use(a_1).. true S6 -
3074 '---> S6: a_new = .... - S4 -
3075 S5: ... = ..use(a_0).. - - -
3076
3077 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3078 to each other through the RELATED_STMT field).
3079
3080 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3081 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3082 remain irrelevant unless used by stmts other than S4.
3083
3084 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3085 (because they are marked as irrelevant). It will vectorize S6, and record
3086 a pointer to the new vector stmt VS6 from S6 (as usual).
3087 S4 will be skipped, and S5 will be vectorized as usual:
3088
3089 in_pattern_p related_stmt vec_stmt
3090 S1: a_i = .... - - -
3091 S2: a_2 = ..use(a_i).. - - -
3092 S3: a_1 = ..use(a_2).. - - -
3093 > VS6: va_new = .... - - -
3094 S4: a_0 = ..use(a_1).. true S6 VS6
3095 '---> S6: a_new = .... - S4 VS6
3096 > VS5: ... = ..vuse(va_new).. - - -
3097 S5: ... = ..use(a_0).. - - -
3098
3099 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3100 elsewhere), and we'll end up with:
3101
3102 VS6: va_new = ....
3103 VS5: ... = ..vuse(va_new)..
3104
3105 In case of more than one pattern statements, e.g., widen-mult with
3106 intermediate type:
3107
3108 S1 a_t = ;
3109 S2 a_T = (TYPE) a_t;
3110 '--> S3: a_it = (interm_type) a_t;
3111 S4 prod_T = a_T * CONST;
3112 '--> S5: prod_T' = a_it w* CONST;
3113
3114 there may be other users of a_T outside the pattern. In that case S2 will
3115 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3116 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3117 be recorded in S3. */
3118
3119 void
3120 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3121 {
3122 struct loop *loop;
3123 basic_block *bbs;
3124 unsigned int nbbs;
3125 gimple_stmt_iterator si;
3126 unsigned int i, j;
3127 vect_recog_func_ptr vect_recog_func;
3128 vec<gimple> stmts_to_replace;
3129 stmts_to_replace.create (1);
3130 gimple stmt;
3131
3132 if (dump_enabled_p ())
3133 dump_printf_loc (MSG_NOTE, vect_location,
3134 "=== vect_pattern_recog ===");
3135
3136 if (loop_vinfo)
3137 {
3138 loop = LOOP_VINFO_LOOP (loop_vinfo);
3139 bbs = LOOP_VINFO_BBS (loop_vinfo);
3140 nbbs = loop->num_nodes;
3141 }
3142 else
3143 {
3144 bbs = &BB_VINFO_BB (bb_vinfo);
3145 nbbs = 1;
3146 }
3147
3148 /* Scan through the loop stmts, applying the pattern recognition
3149 functions starting at each stmt visited: */
3150 for (i = 0; i < nbbs; i++)
3151 {
3152 basic_block bb = bbs[i];
3153 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3154 {
3155 if (bb_vinfo && (stmt = gsi_stmt (si))
3156 && vinfo_for_stmt (stmt)
3157 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3158 continue;
3159
3160 /* Scan over all generic vect_recog_xxx_pattern functions. */
3161 for (j = 0; j < NUM_PATTERNS; j++)
3162 {
3163 vect_recog_func = vect_vect_recog_func_ptrs[j];
3164 vect_pattern_recog_1 (vect_recog_func, si,
3165 &stmts_to_replace);
3166 }
3167 }
3168 }
3169
3170 stmts_to_replace.release ();
3171 }