]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-patterns.c
tree-ssa.h: New.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-ssa.h"
31 #include "cfgloop.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "params.h"
35 #include "tree-data-ref.h"
36 #include "tree-vectorizer.h"
37 #include "recog.h" /* FIXME: for insn_data */
38 #include "diagnostic-core.h"
39 #include "dumpfile.h"
40
41 /* Pattern recognition functions */
42 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
43 tree *);
44 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
45 tree *);
46 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
47 tree *);
48 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
49 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
50 tree *);
51 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
52 tree *, tree *);
53 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
55 tree *, tree *);
56 static gimple vect_recog_divmod_pattern (vec<gimple> *,
57 tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
59 tree *, tree *);
60 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_widen_shift_pattern,
67 vect_recog_over_widening_pattern,
68 vect_recog_rotate_pattern,
69 vect_recog_vector_vector_shift_pattern,
70 vect_recog_divmod_pattern,
71 vect_recog_mixed_size_cond_pattern,
72 vect_recog_bool_pattern};
73
74 static inline void
75 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
76 {
77 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
78 stmt);
79 }
80
81 static inline void
82 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
83 {
84 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
85 append_pattern_def_seq (stmt_info, stmt);
86 }
87
88 /* Check whether STMT2 is in the same loop or basic block as STMT1.
89 Which of the two applies depends on whether we're currently doing
90 loop-based or basic-block-based vectorization, as determined by
91 the vinfo_for_stmt for STMT1 (which must be defined).
92
93 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
94 to be defined as well. */
95
96 static bool
97 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
98 {
99 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
100 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
101 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
102
103 if (!gimple_bb (stmt2))
104 return false;
105
106 if (loop_vinfo)
107 {
108 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
109 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
110 return false;
111 }
112 else
113 {
114 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
115 || gimple_code (stmt2) == GIMPLE_PHI)
116 return false;
117 }
118
119 gcc_assert (vinfo_for_stmt (stmt2));
120 return true;
121 }
122
123 /* If the LHS of DEF_STMT has a single use, and that statement is
124 in the same loop or basic block, return it. */
125
126 static gimple
127 vect_single_imm_use (gimple def_stmt)
128 {
129 tree lhs = gimple_assign_lhs (def_stmt);
130 use_operand_p use_p;
131 gimple use_stmt;
132
133 if (!single_imm_use (lhs, &use_p, &use_stmt))
134 return NULL;
135
136 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
137 return NULL;
138
139 return use_stmt;
140 }
141
142 /* Check whether NAME, an ssa-name used in USE_STMT,
143 is a result of a type promotion or demotion, such that:
144 DEF_STMT: NAME = NOP (name0)
145 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
146 If CHECK_SIGN is TRUE, check that either both types are signed or both are
147 unsigned. */
148
149 static bool
150 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
151 tree *orig_type, gimple *def_stmt, bool *promotion)
152 {
153 tree dummy;
154 gimple dummy_gimple;
155 loop_vec_info loop_vinfo;
156 stmt_vec_info stmt_vinfo;
157 tree type = TREE_TYPE (name);
158 tree oprnd0;
159 enum vect_def_type dt;
160 tree def;
161 bb_vec_info bb_vinfo;
162
163 stmt_vinfo = vinfo_for_stmt (use_stmt);
164 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
165 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
166 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
167 &def, &dt))
168 return false;
169
170 if (dt != vect_internal_def
171 && dt != vect_external_def && dt != vect_constant_def)
172 return false;
173
174 if (!*def_stmt)
175 return false;
176
177 if (!is_gimple_assign (*def_stmt))
178 return false;
179
180 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
181 return false;
182
183 oprnd0 = gimple_assign_rhs1 (*def_stmt);
184
185 *orig_type = TREE_TYPE (oprnd0);
186 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
187 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
188 return false;
189
190 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
191 *promotion = true;
192 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
193 *promotion = false;
194 else
195 return false;
196
197 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
198 bb_vinfo, &dummy_gimple, &dummy, &dt))
199 return false;
200
201 return true;
202 }
203
204 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
205 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
206
207 static tree
208 vect_recog_temp_ssa_var (tree type, gimple stmt)
209 {
210 return make_temp_ssa_name (type, stmt, "patt");
211 }
212
213 /* Function vect_recog_dot_prod_pattern
214
215 Try to find the following pattern:
216
217 type x_t, y_t;
218 TYPE1 prod;
219 TYPE2 sum = init;
220 loop:
221 sum_0 = phi <init, sum_1>
222 S1 x_t = ...
223 S2 y_t = ...
224 S3 x_T = (TYPE1) x_t;
225 S4 y_T = (TYPE1) y_t;
226 S5 prod = x_T * y_T;
227 [S6 prod = (TYPE2) prod; #optional]
228 S7 sum_1 = prod + sum_0;
229
230 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
231 same size of 'TYPE1' or bigger. This is a special case of a reduction
232 computation.
233
234 Input:
235
236 * STMTS: Contains a stmt from which the pattern search begins. In the
237 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
238 will be detected.
239
240 Output:
241
242 * TYPE_IN: The type of the input arguments to the pattern.
243
244 * TYPE_OUT: The type of the output of this pattern.
245
246 * Return value: A new stmt that will be used to replace the sequence of
247 stmts that constitute the pattern. In this case it will be:
248 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
249
250 Note: The dot-prod idiom is a widening reduction pattern that is
251 vectorized without preserving all the intermediate results. It
252 produces only N/2 (widened) results (by summing up pairs of
253 intermediate results) rather than all N results. Therefore, we
254 cannot allow this pattern when we want to get all the results and in
255 the correct order (as is the case when this computation is in an
256 inner-loop nested in an outer-loop that us being vectorized). */
257
258 static gimple
259 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
260 tree *type_out)
261 {
262 gimple stmt, last_stmt = (*stmts)[0];
263 tree oprnd0, oprnd1;
264 tree oprnd00, oprnd01;
265 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
266 tree type, half_type;
267 gimple pattern_stmt;
268 tree prod_type;
269 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
270 struct loop *loop;
271 tree var;
272 bool promotion;
273
274 if (!loop_info)
275 return NULL;
276
277 loop = LOOP_VINFO_LOOP (loop_info);
278
279 if (!is_gimple_assign (last_stmt))
280 return NULL;
281
282 type = gimple_expr_type (last_stmt);
283
284 /* Look for the following pattern
285 DX = (TYPE1) X;
286 DY = (TYPE1) Y;
287 DPROD = DX * DY;
288 DDPROD = (TYPE2) DPROD;
289 sum_1 = DDPROD + sum_0;
290 In which
291 - DX is double the size of X
292 - DY is double the size of Y
293 - DX, DY, DPROD all have the same type
294 - sum is the same size of DPROD or bigger
295 - sum has been recognized as a reduction variable.
296
297 This is equivalent to:
298 DPROD = X w* Y; #widen mult
299 sum_1 = DPROD w+ sum_0; #widen summation
300 or
301 DPROD = X w* Y; #widen mult
302 sum_1 = DPROD + sum_0; #summation
303 */
304
305 /* Starting from LAST_STMT, follow the defs of its uses in search
306 of the above pattern. */
307
308 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
309 return NULL;
310
311 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
312 {
313 /* Has been detected as widening-summation? */
314
315 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
316 type = gimple_expr_type (stmt);
317 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
318 return NULL;
319 oprnd0 = gimple_assign_rhs1 (stmt);
320 oprnd1 = gimple_assign_rhs2 (stmt);
321 half_type = TREE_TYPE (oprnd0);
322 }
323 else
324 {
325 gimple def_stmt;
326
327 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
328 return NULL;
329 oprnd0 = gimple_assign_rhs1 (last_stmt);
330 oprnd1 = gimple_assign_rhs2 (last_stmt);
331 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
332 || !types_compatible_p (TREE_TYPE (oprnd1), type))
333 return NULL;
334 stmt = last_stmt;
335
336 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
337 &promotion)
338 && promotion)
339 {
340 stmt = def_stmt;
341 oprnd0 = gimple_assign_rhs1 (stmt);
342 }
343 else
344 half_type = type;
345 }
346
347 /* So far so good. Since last_stmt was detected as a (summation) reduction,
348 we know that oprnd1 is the reduction variable (defined by a loop-header
349 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
350 Left to check that oprnd0 is defined by a (widen_)mult_expr */
351 if (TREE_CODE (oprnd0) != SSA_NAME)
352 return NULL;
353
354 prod_type = half_type;
355 stmt = SSA_NAME_DEF_STMT (oprnd0);
356
357 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
358 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
359 return NULL;
360
361 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
362 inside the loop (in case we are analyzing an outer-loop). */
363 if (!is_gimple_assign (stmt))
364 return NULL;
365 stmt_vinfo = vinfo_for_stmt (stmt);
366 gcc_assert (stmt_vinfo);
367 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
368 return NULL;
369 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
370 return NULL;
371 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
372 {
373 /* Has been detected as a widening multiplication? */
374
375 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
376 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
377 return NULL;
378 stmt_vinfo = vinfo_for_stmt (stmt);
379 gcc_assert (stmt_vinfo);
380 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
381 oprnd00 = gimple_assign_rhs1 (stmt);
382 oprnd01 = gimple_assign_rhs2 (stmt);
383 }
384 else
385 {
386 tree half_type0, half_type1;
387 gimple def_stmt;
388 tree oprnd0, oprnd1;
389
390 oprnd0 = gimple_assign_rhs1 (stmt);
391 oprnd1 = gimple_assign_rhs2 (stmt);
392 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
393 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
394 return NULL;
395 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
396 &promotion)
397 || !promotion)
398 return NULL;
399 oprnd00 = gimple_assign_rhs1 (def_stmt);
400 if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
401 &promotion)
402 || !promotion)
403 return NULL;
404 oprnd01 = gimple_assign_rhs1 (def_stmt);
405 if (!types_compatible_p (half_type0, half_type1))
406 return NULL;
407 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
408 return NULL;
409 }
410
411 half_type = TREE_TYPE (oprnd00);
412 *type_in = half_type;
413 *type_out = type;
414
415 /* Pattern detected. Create a stmt to be used to replace the pattern: */
416 var = vect_recog_temp_ssa_var (type, NULL);
417 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
418 oprnd00, oprnd01, oprnd1);
419
420 if (dump_enabled_p ())
421 {
422 dump_printf_loc (MSG_NOTE, vect_location,
423 "vect_recog_dot_prod_pattern: detected: ");
424 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
425 }
426
427 /* We don't allow changing the order of the computation in the inner-loop
428 when doing outer-loop vectorization. */
429 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
430
431 return pattern_stmt;
432 }
433
434
435 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
436 and LSHIFT_EXPR.
437
438 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
439 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
440
441 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
442 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
443 that satisfies the above restrictions, we can perform a widening opeartion
444 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
445 with a_it = (interm_type) a_t; */
446
447 static bool
448 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
449 tree const_oprnd, tree *oprnd,
450 vec<gimple> *stmts, tree type,
451 tree *half_type, gimple def_stmt)
452 {
453 tree new_type, new_oprnd;
454 gimple new_stmt;
455
456 if (code != MULT_EXPR && code != LSHIFT_EXPR)
457 return false;
458
459 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
460 || (code == LSHIFT_EXPR
461 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
462 != 1))
463 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
464 {
465 /* CONST_OPRND is a constant of HALF_TYPE. */
466 *oprnd = gimple_assign_rhs1 (def_stmt);
467 return true;
468 }
469
470 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
471 return false;
472
473 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
474 return false;
475
476 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
477 a type 2 times bigger than HALF_TYPE. */
478 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
479 TYPE_UNSIGNED (type));
480 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
481 || (code == LSHIFT_EXPR
482 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
483 return false;
484
485 /* Use NEW_TYPE for widening operation. */
486 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
487 {
488 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
489 /* Check if the already created pattern stmt is what we need. */
490 if (!is_gimple_assign (new_stmt)
491 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
492 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
493 return false;
494
495 stmts->safe_push (def_stmt);
496 *oprnd = gimple_assign_lhs (new_stmt);
497 }
498 else
499 {
500 /* Create a_T = (NEW_TYPE) a_t; */
501 *oprnd = gimple_assign_rhs1 (def_stmt);
502 new_oprnd = make_ssa_name (new_type, NULL);
503 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
504 NULL_TREE);
505 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
506 stmts->safe_push (def_stmt);
507 *oprnd = new_oprnd;
508 }
509
510 *half_type = new_type;
511 return true;
512 }
513
514
515 /* Function vect_recog_widen_mult_pattern
516
517 Try to find the following pattern:
518
519 type a_t, b_t;
520 TYPE a_T, b_T, prod_T;
521
522 S1 a_t = ;
523 S2 b_t = ;
524 S3 a_T = (TYPE) a_t;
525 S4 b_T = (TYPE) b_t;
526 S5 prod_T = a_T * b_T;
527
528 where type 'TYPE' is at least double the size of type 'type'.
529
530 Also detect unsigned cases:
531
532 unsigned type a_t, b_t;
533 unsigned TYPE u_prod_T;
534 TYPE a_T, b_T, prod_T;
535
536 S1 a_t = ;
537 S2 b_t = ;
538 S3 a_T = (TYPE) a_t;
539 S4 b_T = (TYPE) b_t;
540 S5 prod_T = a_T * b_T;
541 S6 u_prod_T = (unsigned TYPE) prod_T;
542
543 and multiplication by constants:
544
545 type a_t;
546 TYPE a_T, prod_T;
547
548 S1 a_t = ;
549 S3 a_T = (TYPE) a_t;
550 S5 prod_T = a_T * CONST;
551
552 A special case of multiplication by constants is when 'TYPE' is 4 times
553 bigger than 'type', but CONST fits an intermediate type 2 times smaller
554 than 'TYPE'. In that case we create an additional pattern stmt for S3
555 to create a variable of the intermediate type, and perform widen-mult
556 on the intermediate type as well:
557
558 type a_t;
559 interm_type a_it;
560 TYPE a_T, prod_T, prod_T';
561
562 S1 a_t = ;
563 S3 a_T = (TYPE) a_t;
564 '--> a_it = (interm_type) a_t;
565 S5 prod_T = a_T * CONST;
566 '--> prod_T' = a_it w* CONST;
567
568 Input/Output:
569
570 * STMTS: Contains a stmt from which the pattern search begins. In the
571 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
572 is detected. In case of unsigned widen-mult, the original stmt (S5) is
573 replaced with S6 in STMTS. In case of multiplication by a constant
574 of an intermediate type (the last case above), STMTS also contains S3
575 (inserted before S5).
576
577 Output:
578
579 * TYPE_IN: The type of the input arguments to the pattern.
580
581 * TYPE_OUT: The type of the output of this pattern.
582
583 * Return value: A new stmt that will be used to replace the sequence of
584 stmts that constitute the pattern. In this case it will be:
585 WIDEN_MULT <a_t, b_t>
586 */
587
588 static gimple
589 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
590 tree *type_in, tree *type_out)
591 {
592 gimple last_stmt = stmts->pop ();
593 gimple def_stmt0, def_stmt1;
594 tree oprnd0, oprnd1;
595 tree type, half_type0, half_type1;
596 gimple pattern_stmt;
597 tree vectype, vectype_out = NULL_TREE;
598 tree var;
599 enum tree_code dummy_code;
600 int dummy_int;
601 vec<tree> dummy_vec;
602 bool op1_ok;
603 bool promotion;
604
605 if (!is_gimple_assign (last_stmt))
606 return NULL;
607
608 type = gimple_expr_type (last_stmt);
609
610 /* Starting from LAST_STMT, follow the defs of its uses in search
611 of the above pattern. */
612
613 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
614 return NULL;
615
616 oprnd0 = gimple_assign_rhs1 (last_stmt);
617 oprnd1 = gimple_assign_rhs2 (last_stmt);
618 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
619 || !types_compatible_p (TREE_TYPE (oprnd1), type))
620 return NULL;
621
622 /* Check argument 0. */
623 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
624 &promotion)
625 || !promotion)
626 return NULL;
627 /* Check argument 1. */
628 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
629 &def_stmt1, &promotion);
630
631 if (op1_ok && promotion)
632 {
633 oprnd0 = gimple_assign_rhs1 (def_stmt0);
634 oprnd1 = gimple_assign_rhs1 (def_stmt1);
635 }
636 else
637 {
638 if (TREE_CODE (oprnd1) == INTEGER_CST
639 && TREE_CODE (half_type0) == INTEGER_TYPE
640 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
641 &oprnd0, stmts, type,
642 &half_type0, def_stmt0))
643 {
644 half_type1 = half_type0;
645 oprnd1 = fold_convert (half_type1, oprnd1);
646 }
647 else
648 return NULL;
649 }
650
651 /* Handle unsigned case. Look for
652 S6 u_prod_T = (unsigned TYPE) prod_T;
653 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
654 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
655 {
656 gimple use_stmt;
657 tree use_lhs;
658 tree use_type;
659
660 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
661 return NULL;
662
663 use_stmt = vect_single_imm_use (last_stmt);
664 if (!use_stmt || !is_gimple_assign (use_stmt)
665 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
666 return NULL;
667
668 use_lhs = gimple_assign_lhs (use_stmt);
669 use_type = TREE_TYPE (use_lhs);
670 if (!INTEGRAL_TYPE_P (use_type)
671 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
672 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
673 return NULL;
674
675 type = use_type;
676 last_stmt = use_stmt;
677 }
678
679 if (!types_compatible_p (half_type0, half_type1))
680 return NULL;
681
682 /* Pattern detected. */
683 if (dump_enabled_p ())
684 dump_printf_loc (MSG_NOTE, vect_location,
685 "vect_recog_widen_mult_pattern: detected: ");
686
687 /* Check target support */
688 vectype = get_vectype_for_scalar_type (half_type0);
689 vectype_out = get_vectype_for_scalar_type (type);
690 if (!vectype
691 || !vectype_out
692 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
693 vectype_out, vectype,
694 &dummy_code, &dummy_code,
695 &dummy_int, &dummy_vec))
696 return NULL;
697
698 *type_in = vectype;
699 *type_out = vectype_out;
700
701 /* Pattern supported. Create a stmt to be used to replace the pattern: */
702 var = vect_recog_temp_ssa_var (type, NULL);
703 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
704 oprnd1);
705
706 if (dump_enabled_p ())
707 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
708
709 stmts->safe_push (last_stmt);
710 return pattern_stmt;
711 }
712
713
714 /* Function vect_recog_pow_pattern
715
716 Try to find the following pattern:
717
718 x = POW (y, N);
719
720 with POW being one of pow, powf, powi, powif and N being
721 either 2 or 0.5.
722
723 Input:
724
725 * LAST_STMT: A stmt from which the pattern search begins.
726
727 Output:
728
729 * TYPE_IN: The type of the input arguments to the pattern.
730
731 * TYPE_OUT: The type of the output of this pattern.
732
733 * Return value: A new stmt that will be used to replace the sequence of
734 stmts that constitute the pattern. In this case it will be:
735 x = x * x
736 or
737 x = sqrt (x)
738 */
739
740 static gimple
741 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
742 tree *type_out)
743 {
744 gimple last_stmt = (*stmts)[0];
745 tree fn, base, exp = NULL;
746 gimple stmt;
747 tree var;
748
749 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
750 return NULL;
751
752 fn = gimple_call_fndecl (last_stmt);
753 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
754 return NULL;
755
756 switch (DECL_FUNCTION_CODE (fn))
757 {
758 case BUILT_IN_POWIF:
759 case BUILT_IN_POWI:
760 case BUILT_IN_POWF:
761 case BUILT_IN_POW:
762 base = gimple_call_arg (last_stmt, 0);
763 exp = gimple_call_arg (last_stmt, 1);
764 if (TREE_CODE (exp) != REAL_CST
765 && TREE_CODE (exp) != INTEGER_CST)
766 return NULL;
767 break;
768
769 default:
770 return NULL;
771 }
772
773 /* We now have a pow or powi builtin function call with a constant
774 exponent. */
775
776 *type_out = NULL_TREE;
777
778 /* Catch squaring. */
779 if ((host_integerp (exp, 0)
780 && tree_low_cst (exp, 0) == 2)
781 || (TREE_CODE (exp) == REAL_CST
782 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
783 {
784 *type_in = TREE_TYPE (base);
785
786 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
787 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
788 return stmt;
789 }
790
791 /* Catch square root. */
792 if (TREE_CODE (exp) == REAL_CST
793 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
794 {
795 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
796 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
797 if (*type_in)
798 {
799 gimple stmt = gimple_build_call (newfn, 1, base);
800 if (vectorizable_function (stmt, *type_in, *type_in)
801 != NULL_TREE)
802 {
803 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
804 gimple_call_set_lhs (stmt, var);
805 return stmt;
806 }
807 }
808 }
809
810 return NULL;
811 }
812
813
814 /* Function vect_recog_widen_sum_pattern
815
816 Try to find the following pattern:
817
818 type x_t;
819 TYPE x_T, sum = init;
820 loop:
821 sum_0 = phi <init, sum_1>
822 S1 x_t = *p;
823 S2 x_T = (TYPE) x_t;
824 S3 sum_1 = x_T + sum_0;
825
826 where type 'TYPE' is at least double the size of type 'type', i.e - we're
827 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
828 a special case of a reduction computation.
829
830 Input:
831
832 * LAST_STMT: A stmt from which the pattern search begins. In the example,
833 when this function is called with S3, the pattern {S2,S3} will be detected.
834
835 Output:
836
837 * TYPE_IN: The type of the input arguments to the pattern.
838
839 * TYPE_OUT: The type of the output of this pattern.
840
841 * Return value: A new stmt that will be used to replace the sequence of
842 stmts that constitute the pattern. In this case it will be:
843 WIDEN_SUM <x_t, sum_0>
844
845 Note: The widening-sum idiom is a widening reduction pattern that is
846 vectorized without preserving all the intermediate results. It
847 produces only N/2 (widened) results (by summing up pairs of
848 intermediate results) rather than all N results. Therefore, we
849 cannot allow this pattern when we want to get all the results and in
850 the correct order (as is the case when this computation is in an
851 inner-loop nested in an outer-loop that us being vectorized). */
852
853 static gimple
854 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
855 tree *type_out)
856 {
857 gimple stmt, last_stmt = (*stmts)[0];
858 tree oprnd0, oprnd1;
859 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
860 tree type, half_type;
861 gimple pattern_stmt;
862 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
863 struct loop *loop;
864 tree var;
865 bool promotion;
866
867 if (!loop_info)
868 return NULL;
869
870 loop = LOOP_VINFO_LOOP (loop_info);
871
872 if (!is_gimple_assign (last_stmt))
873 return NULL;
874
875 type = gimple_expr_type (last_stmt);
876
877 /* Look for the following pattern
878 DX = (TYPE) X;
879 sum_1 = DX + sum_0;
880 In which DX is at least double the size of X, and sum_1 has been
881 recognized as a reduction variable.
882 */
883
884 /* Starting from LAST_STMT, follow the defs of its uses in search
885 of the above pattern. */
886
887 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
888 return NULL;
889
890 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
891 return NULL;
892
893 oprnd0 = gimple_assign_rhs1 (last_stmt);
894 oprnd1 = gimple_assign_rhs2 (last_stmt);
895 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
896 || !types_compatible_p (TREE_TYPE (oprnd1), type))
897 return NULL;
898
899 /* So far so good. Since last_stmt was detected as a (summation) reduction,
900 we know that oprnd1 is the reduction variable (defined by a loop-header
901 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
902 Left to check that oprnd0 is defined by a cast from type 'type' to type
903 'TYPE'. */
904
905 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
906 &promotion)
907 || !promotion)
908 return NULL;
909
910 oprnd0 = gimple_assign_rhs1 (stmt);
911 *type_in = half_type;
912 *type_out = type;
913
914 /* Pattern detected. Create a stmt to be used to replace the pattern: */
915 var = vect_recog_temp_ssa_var (type, NULL);
916 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
917 oprnd0, oprnd1);
918
919 if (dump_enabled_p ())
920 {
921 dump_printf_loc (MSG_NOTE, vect_location,
922 "vect_recog_widen_sum_pattern: detected: ");
923 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
924 }
925
926 /* We don't allow changing the order of the computation in the inner-loop
927 when doing outer-loop vectorization. */
928 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
929
930 return pattern_stmt;
931 }
932
933
934 /* Return TRUE if the operation in STMT can be performed on a smaller type.
935
936 Input:
937 STMT - a statement to check.
938 DEF - we support operations with two operands, one of which is constant.
939 The other operand can be defined by a demotion operation, or by a
940 previous statement in a sequence of over-promoted operations. In the
941 later case DEF is used to replace that operand. (It is defined by a
942 pattern statement we created for the previous statement in the
943 sequence).
944
945 Input/output:
946 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
947 NULL, it's the type of DEF.
948 STMTS - additional pattern statements. If a pattern statement (type
949 conversion) is created in this function, its original statement is
950 added to STMTS.
951
952 Output:
953 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
954 operands to use in the new pattern statement for STMT (will be created
955 in vect_recog_over_widening_pattern ()).
956 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
957 statements for STMT: the first one is a type promotion and the second
958 one is the operation itself. We return the type promotion statement
959 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
960 the second pattern statement. */
961
962 static bool
963 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
964 tree *op0, tree *op1, gimple *new_def_stmt,
965 vec<gimple> *stmts)
966 {
967 enum tree_code code;
968 tree const_oprnd, oprnd;
969 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
970 gimple def_stmt, new_stmt;
971 bool first = false;
972 bool promotion;
973
974 *op0 = NULL_TREE;
975 *op1 = NULL_TREE;
976 *new_def_stmt = NULL;
977
978 if (!is_gimple_assign (stmt))
979 return false;
980
981 code = gimple_assign_rhs_code (stmt);
982 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
983 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
984 return false;
985
986 oprnd = gimple_assign_rhs1 (stmt);
987 const_oprnd = gimple_assign_rhs2 (stmt);
988 type = gimple_expr_type (stmt);
989
990 if (TREE_CODE (oprnd) != SSA_NAME
991 || TREE_CODE (const_oprnd) != INTEGER_CST)
992 return false;
993
994 /* If oprnd has other uses besides that in stmt we cannot mark it
995 as being part of a pattern only. */
996 if (!has_single_use (oprnd))
997 return false;
998
999 /* If we are in the middle of a sequence, we use DEF from a previous
1000 statement. Otherwise, OPRND has to be a result of type promotion. */
1001 if (*new_type)
1002 {
1003 half_type = *new_type;
1004 oprnd = def;
1005 }
1006 else
1007 {
1008 first = true;
1009 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1010 &promotion)
1011 || !promotion
1012 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1013 return false;
1014 }
1015
1016 /* Can we perform the operation on a smaller type? */
1017 switch (code)
1018 {
1019 case BIT_IOR_EXPR:
1020 case BIT_XOR_EXPR:
1021 case BIT_AND_EXPR:
1022 if (!int_fits_type_p (const_oprnd, half_type))
1023 {
1024 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1025 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1026 return false;
1027
1028 interm_type = build_nonstandard_integer_type (
1029 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1030 if (!int_fits_type_p (const_oprnd, interm_type))
1031 return false;
1032 }
1033
1034 break;
1035
1036 case LSHIFT_EXPR:
1037 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1038 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1039 return false;
1040
1041 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1042 (e.g., if the original value was char, the shift amount is at most 8
1043 if we want to use short). */
1044 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1045 return false;
1046
1047 interm_type = build_nonstandard_integer_type (
1048 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1049
1050 if (!vect_supportable_shift (code, interm_type))
1051 return false;
1052
1053 break;
1054
1055 case RSHIFT_EXPR:
1056 if (vect_supportable_shift (code, half_type))
1057 break;
1058
1059 /* Try intermediate type - HALF_TYPE is not supported. */
1060 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1061 return false;
1062
1063 interm_type = build_nonstandard_integer_type (
1064 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1065
1066 if (!vect_supportable_shift (code, interm_type))
1067 return false;
1068
1069 break;
1070
1071 default:
1072 gcc_unreachable ();
1073 }
1074
1075 /* There are four possible cases:
1076 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1077 the first statement in the sequence)
1078 a. The original, HALF_TYPE, is not enough - we replace the promotion
1079 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1080 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1081 promotion.
1082 2. OPRND is defined by a pattern statement we created.
1083 a. Its type is not sufficient for the operation, we create a new stmt:
1084 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1085 this statement in NEW_DEF_STMT, and it is later put in
1086 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1087 b. OPRND is good to use in the new statement. */
1088 if (first)
1089 {
1090 if (interm_type)
1091 {
1092 /* Replace the original type conversion HALF_TYPE->TYPE with
1093 HALF_TYPE->INTERM_TYPE. */
1094 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1095 {
1096 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1097 /* Check if the already created pattern stmt is what we need. */
1098 if (!is_gimple_assign (new_stmt)
1099 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1100 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1101 return false;
1102
1103 stmts->safe_push (def_stmt);
1104 oprnd = gimple_assign_lhs (new_stmt);
1105 }
1106 else
1107 {
1108 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1109 oprnd = gimple_assign_rhs1 (def_stmt);
1110 new_oprnd = make_ssa_name (interm_type, NULL);
1111 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1112 oprnd, NULL_TREE);
1113 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1114 stmts->safe_push (def_stmt);
1115 oprnd = new_oprnd;
1116 }
1117 }
1118 else
1119 {
1120 /* Retrieve the operand before the type promotion. */
1121 oprnd = gimple_assign_rhs1 (def_stmt);
1122 }
1123 }
1124 else
1125 {
1126 if (interm_type)
1127 {
1128 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1129 new_oprnd = make_ssa_name (interm_type, NULL);
1130 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1131 oprnd, NULL_TREE);
1132 oprnd = new_oprnd;
1133 *new_def_stmt = new_stmt;
1134 }
1135
1136 /* Otherwise, OPRND is already set. */
1137 }
1138
1139 if (interm_type)
1140 *new_type = interm_type;
1141 else
1142 *new_type = half_type;
1143
1144 *op0 = oprnd;
1145 *op1 = fold_convert (*new_type, const_oprnd);
1146
1147 return true;
1148 }
1149
1150
1151 /* Try to find a statement or a sequence of statements that can be performed
1152 on a smaller type:
1153
1154 type x_t;
1155 TYPE x_T, res0_T, res1_T;
1156 loop:
1157 S1 x_t = *p;
1158 S2 x_T = (TYPE) x_t;
1159 S3 res0_T = op (x_T, C0);
1160 S4 res1_T = op (res0_T, C1);
1161 S5 ... = () res1_T; - type demotion
1162
1163 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1164 constants.
1165 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1166 be 'type' or some intermediate type. For now, we expect S5 to be a type
1167 demotion operation. We also check that S3 and S4 have only one use. */
1168
1169 static gimple
1170 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1171 tree *type_in, tree *type_out)
1172 {
1173 gimple stmt = stmts->pop ();
1174 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1175 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1176 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1177 bool first;
1178 tree type = NULL;
1179
1180 first = true;
1181 while (1)
1182 {
1183 if (!vinfo_for_stmt (stmt)
1184 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1185 return NULL;
1186
1187 new_def_stmt = NULL;
1188 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1189 &op0, &op1, &new_def_stmt,
1190 stmts))
1191 {
1192 if (first)
1193 return NULL;
1194 else
1195 break;
1196 }
1197
1198 /* STMT can be performed on a smaller type. Check its uses. */
1199 use_stmt = vect_single_imm_use (stmt);
1200 if (!use_stmt || !is_gimple_assign (use_stmt))
1201 return NULL;
1202
1203 /* Create pattern statement for STMT. */
1204 vectype = get_vectype_for_scalar_type (new_type);
1205 if (!vectype)
1206 return NULL;
1207
1208 /* We want to collect all the statements for which we create pattern
1209 statetments, except for the case when the last statement in the
1210 sequence doesn't have a corresponding pattern statement. In such
1211 case we associate the last pattern statement with the last statement
1212 in the sequence. Therefore, we only add the original statement to
1213 the list if we know that it is not the last. */
1214 if (prev_stmt)
1215 stmts->safe_push (prev_stmt);
1216
1217 var = vect_recog_temp_ssa_var (new_type, NULL);
1218 pattern_stmt
1219 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1220 op0, op1);
1221 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1222 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1223
1224 if (dump_enabled_p ())
1225 {
1226 dump_printf_loc (MSG_NOTE, vect_location,
1227 "created pattern stmt: ");
1228 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1229 }
1230
1231 type = gimple_expr_type (stmt);
1232 prev_stmt = stmt;
1233 stmt = use_stmt;
1234
1235 first = false;
1236 }
1237
1238 /* We got a sequence. We expect it to end with a type demotion operation.
1239 Otherwise, we quit (for now). There are three possible cases: the
1240 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1241 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1242 NEW_TYPE differs (we create a new conversion statement). */
1243 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1244 {
1245 use_lhs = gimple_assign_lhs (use_stmt);
1246 use_type = TREE_TYPE (use_lhs);
1247 /* Support only type demotion or signedess change. */
1248 if (!INTEGRAL_TYPE_P (use_type)
1249 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1250 return NULL;
1251
1252 /* Check that NEW_TYPE is not bigger than the conversion result. */
1253 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1254 return NULL;
1255
1256 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1257 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1258 {
1259 /* Create NEW_TYPE->USE_TYPE conversion. */
1260 new_oprnd = make_ssa_name (use_type, NULL);
1261 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1262 var, NULL_TREE);
1263 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1264
1265 *type_in = get_vectype_for_scalar_type (new_type);
1266 *type_out = get_vectype_for_scalar_type (use_type);
1267
1268 /* We created a pattern statement for the last statement in the
1269 sequence, so we don't need to associate it with the pattern
1270 statement created for PREV_STMT. Therefore, we add PREV_STMT
1271 to the list in order to mark it later in vect_pattern_recog_1. */
1272 if (prev_stmt)
1273 stmts->safe_push (prev_stmt);
1274 }
1275 else
1276 {
1277 if (prev_stmt)
1278 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1279 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1280
1281 *type_in = vectype;
1282 *type_out = NULL_TREE;
1283 }
1284
1285 stmts->safe_push (use_stmt);
1286 }
1287 else
1288 /* TODO: support general case, create a conversion to the correct type. */
1289 return NULL;
1290
1291 /* Pattern detected. */
1292 if (dump_enabled_p ())
1293 {
1294 dump_printf_loc (MSG_NOTE, vect_location,
1295 "vect_recog_over_widening_pattern: detected: ");
1296 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1297 }
1298
1299 return pattern_stmt;
1300 }
1301
1302 /* Detect widening shift pattern:
1303
1304 type a_t;
1305 TYPE a_T, res_T;
1306
1307 S1 a_t = ;
1308 S2 a_T = (TYPE) a_t;
1309 S3 res_T = a_T << CONST;
1310
1311 where type 'TYPE' is at least double the size of type 'type'.
1312
1313 Also detect cases where the shift result is immediately converted
1314 to another type 'result_type' that is no larger in size than 'TYPE'.
1315 In those cases we perform a widen-shift that directly results in
1316 'result_type', to avoid a possible over-widening situation:
1317
1318 type a_t;
1319 TYPE a_T, res_T;
1320 result_type res_result;
1321
1322 S1 a_t = ;
1323 S2 a_T = (TYPE) a_t;
1324 S3 res_T = a_T << CONST;
1325 S4 res_result = (result_type) res_T;
1326 '--> res_result' = a_t w<< CONST;
1327
1328 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1329 create an additional pattern stmt for S2 to create a variable of an
1330 intermediate type, and perform widen-shift on the intermediate type:
1331
1332 type a_t;
1333 interm_type a_it;
1334 TYPE a_T, res_T, res_T';
1335
1336 S1 a_t = ;
1337 S2 a_T = (TYPE) a_t;
1338 '--> a_it = (interm_type) a_t;
1339 S3 res_T = a_T << CONST;
1340 '--> res_T' = a_it <<* CONST;
1341
1342 Input/Output:
1343
1344 * STMTS: Contains a stmt from which the pattern search begins.
1345 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1346 in STMTS. When an intermediate type is used and a pattern statement is
1347 created for S2, we also put S2 here (before S3).
1348
1349 Output:
1350
1351 * TYPE_IN: The type of the input arguments to the pattern.
1352
1353 * TYPE_OUT: The type of the output of this pattern.
1354
1355 * Return value: A new stmt that will be used to replace the sequence of
1356 stmts that constitute the pattern. In this case it will be:
1357 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1358
1359 static gimple
1360 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1361 tree *type_in, tree *type_out)
1362 {
1363 gimple last_stmt = stmts->pop ();
1364 gimple def_stmt0;
1365 tree oprnd0, oprnd1;
1366 tree type, half_type0;
1367 gimple pattern_stmt;
1368 tree vectype, vectype_out = NULL_TREE;
1369 tree var;
1370 enum tree_code dummy_code;
1371 int dummy_int;
1372 vec<tree> dummy_vec;
1373 gimple use_stmt;
1374 bool promotion;
1375
1376 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1377 return NULL;
1378
1379 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1380 return NULL;
1381
1382 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1383 return NULL;
1384
1385 oprnd0 = gimple_assign_rhs1 (last_stmt);
1386 oprnd1 = gimple_assign_rhs2 (last_stmt);
1387 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1388 return NULL;
1389
1390 /* Check operand 0: it has to be defined by a type promotion. */
1391 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1392 &promotion)
1393 || !promotion)
1394 return NULL;
1395
1396 /* Check operand 1: has to be positive. We check that it fits the type
1397 in vect_handle_widen_op_by_const (). */
1398 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1399 return NULL;
1400
1401 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1402 type = gimple_expr_type (last_stmt);
1403
1404 /* Check for subsequent conversion to another type. */
1405 use_stmt = vect_single_imm_use (last_stmt);
1406 if (use_stmt && is_gimple_assign (use_stmt)
1407 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1408 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1409 {
1410 tree use_lhs = gimple_assign_lhs (use_stmt);
1411 tree use_type = TREE_TYPE (use_lhs);
1412
1413 if (INTEGRAL_TYPE_P (use_type)
1414 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1415 {
1416 last_stmt = use_stmt;
1417 type = use_type;
1418 }
1419 }
1420
1421 /* Check if this a widening operation. */
1422 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1423 &oprnd0, stmts,
1424 type, &half_type0, def_stmt0))
1425 return NULL;
1426
1427 /* Pattern detected. */
1428 if (dump_enabled_p ())
1429 dump_printf_loc (MSG_NOTE, vect_location,
1430 "vect_recog_widen_shift_pattern: detected: ");
1431
1432 /* Check target support. */
1433 vectype = get_vectype_for_scalar_type (half_type0);
1434 vectype_out = get_vectype_for_scalar_type (type);
1435
1436 if (!vectype
1437 || !vectype_out
1438 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1439 vectype_out, vectype,
1440 &dummy_code, &dummy_code,
1441 &dummy_int, &dummy_vec))
1442 return NULL;
1443
1444 *type_in = vectype;
1445 *type_out = vectype_out;
1446
1447 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1448 var = vect_recog_temp_ssa_var (type, NULL);
1449 pattern_stmt =
1450 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1451
1452 if (dump_enabled_p ())
1453 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1454
1455 stmts->safe_push (last_stmt);
1456 return pattern_stmt;
1457 }
1458
1459 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1460
1461 type a_t, b_t, c_t;
1462
1463 S0 a_t = b_t r<< c_t;
1464
1465 Input/Output:
1466
1467 * STMTS: Contains a stmt from which the pattern search begins,
1468 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1469 with a sequence:
1470
1471 S1 d_t = -c_t;
1472 S2 e_t = d_t & (B - 1);
1473 S3 f_t = b_t << c_t;
1474 S4 g_t = b_t >> e_t;
1475 S0 a_t = f_t | g_t;
1476
1477 where B is element bitsize of type.
1478
1479 Output:
1480
1481 * TYPE_IN: The type of the input arguments to the pattern.
1482
1483 * TYPE_OUT: The type of the output of this pattern.
1484
1485 * Return value: A new stmt that will be used to replace the rotate
1486 S0 stmt. */
1487
1488 static gimple
1489 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1490 {
1491 gimple last_stmt = stmts->pop ();
1492 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1493 gimple pattern_stmt, def_stmt;
1494 enum tree_code rhs_code;
1495 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1496 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1497 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1498 enum vect_def_type dt;
1499 optab optab1, optab2;
1500 edge ext_def = NULL;
1501
1502 if (!is_gimple_assign (last_stmt))
1503 return NULL;
1504
1505 rhs_code = gimple_assign_rhs_code (last_stmt);
1506 switch (rhs_code)
1507 {
1508 case LROTATE_EXPR:
1509 case RROTATE_EXPR:
1510 break;
1511 default:
1512 return NULL;
1513 }
1514
1515 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1516 return NULL;
1517
1518 lhs = gimple_assign_lhs (last_stmt);
1519 oprnd0 = gimple_assign_rhs1 (last_stmt);
1520 type = TREE_TYPE (oprnd0);
1521 oprnd1 = gimple_assign_rhs2 (last_stmt);
1522 if (TREE_CODE (oprnd0) != SSA_NAME
1523 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1524 || !INTEGRAL_TYPE_P (type)
1525 || !TYPE_UNSIGNED (type))
1526 return NULL;
1527
1528 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1529 &def, &dt))
1530 return NULL;
1531
1532 if (dt != vect_internal_def
1533 && dt != vect_constant_def
1534 && dt != vect_external_def)
1535 return NULL;
1536
1537 vectype = get_vectype_for_scalar_type (type);
1538 if (vectype == NULL_TREE)
1539 return NULL;
1540
1541 /* If vector/vector or vector/scalar rotate is supported by the target,
1542 don't do anything here. */
1543 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1544 if (optab1
1545 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1546 return NULL;
1547
1548 if (bb_vinfo != NULL || dt != vect_internal_def)
1549 {
1550 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1551 if (optab2
1552 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1553 return NULL;
1554 }
1555
1556 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1557 don't do anything here either. */
1558 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1559 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1560 if (!optab1
1561 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1562 || !optab2
1563 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1564 {
1565 if (bb_vinfo == NULL && dt == vect_internal_def)
1566 return NULL;
1567 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1568 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1569 if (!optab1
1570 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1571 || !optab2
1572 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1573 return NULL;
1574 }
1575
1576 *type_in = vectype;
1577 *type_out = vectype;
1578 if (*type_in == NULL_TREE)
1579 return NULL;
1580
1581 if (dt == vect_external_def
1582 && TREE_CODE (oprnd1) == SSA_NAME
1583 && loop_vinfo)
1584 {
1585 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1586 ext_def = loop_preheader_edge (loop);
1587 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1588 {
1589 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1590 if (bb == NULL
1591 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1592 ext_def = NULL;
1593 }
1594 }
1595
1596 def = NULL_TREE;
1597 if (TREE_CODE (oprnd1) == INTEGER_CST
1598 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1599 def = oprnd1;
1600 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1601 {
1602 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1603 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1604 && TYPE_PRECISION (TREE_TYPE (rhs1))
1605 == TYPE_PRECISION (type))
1606 def = rhs1;
1607 }
1608
1609 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1610 if (def == NULL_TREE)
1611 {
1612 def = vect_recog_temp_ssa_var (type, NULL);
1613 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1614 NULL_TREE);
1615 if (ext_def)
1616 {
1617 basic_block new_bb
1618 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1619 gcc_assert (!new_bb);
1620 }
1621 else
1622 append_pattern_def_seq (stmt_vinfo, def_stmt);
1623 }
1624 stype = TREE_TYPE (def);
1625
1626 if (TREE_CODE (def) == INTEGER_CST)
1627 {
1628 if (!host_integerp (def, 1)
1629 || (unsigned HOST_WIDE_INT) tree_low_cst (def, 1)
1630 >= GET_MODE_PRECISION (TYPE_MODE (type))
1631 || integer_zerop (def))
1632 return NULL;
1633 def2 = build_int_cst (stype,
1634 GET_MODE_PRECISION (TYPE_MODE (type))
1635 - tree_low_cst (def, 1));
1636 }
1637 else
1638 {
1639 tree vecstype = get_vectype_for_scalar_type (stype);
1640 stmt_vec_info def_stmt_vinfo;
1641
1642 if (vecstype == NULL_TREE)
1643 return NULL;
1644 def2 = vect_recog_temp_ssa_var (stype, NULL);
1645 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1646 NULL_TREE);
1647 if (ext_def)
1648 {
1649 basic_block new_bb
1650 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1651 gcc_assert (!new_bb);
1652 }
1653 else
1654 {
1655 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1656 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1657 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1658 append_pattern_def_seq (stmt_vinfo, def_stmt);
1659 }
1660
1661 def2 = vect_recog_temp_ssa_var (stype, NULL);
1662 tree mask
1663 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1664 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1665 gimple_assign_lhs (def_stmt),
1666 mask);
1667 if (ext_def)
1668 {
1669 basic_block new_bb
1670 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1671 gcc_assert (!new_bb);
1672 }
1673 else
1674 {
1675 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1676 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1677 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1678 append_pattern_def_seq (stmt_vinfo, def_stmt);
1679 }
1680 }
1681
1682 var1 = vect_recog_temp_ssa_var (type, NULL);
1683 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1684 ? LSHIFT_EXPR : RSHIFT_EXPR,
1685 var1, oprnd0, def);
1686 append_pattern_def_seq (stmt_vinfo, def_stmt);
1687
1688 var2 = vect_recog_temp_ssa_var (type, NULL);
1689 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1690 ? RSHIFT_EXPR : LSHIFT_EXPR,
1691 var2, oprnd0, def2);
1692 append_pattern_def_seq (stmt_vinfo, def_stmt);
1693
1694 /* Pattern detected. */
1695 if (dump_enabled_p ())
1696 dump_printf_loc (MSG_NOTE, vect_location,
1697 "vect_recog_rotate_pattern: detected: ");
1698
1699 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1700 var = vect_recog_temp_ssa_var (type, NULL);
1701 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1702
1703 if (dump_enabled_p ())
1704 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1705
1706 stmts->safe_push (last_stmt);
1707 return pattern_stmt;
1708 }
1709
1710 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1711 vectorized:
1712
1713 type a_t;
1714 TYPE b_T, res_T;
1715
1716 S1 a_t = ;
1717 S2 b_T = ;
1718 S3 res_T = b_T op a_t;
1719
1720 where type 'TYPE' is a type with different size than 'type',
1721 and op is <<, >> or rotate.
1722
1723 Also detect cases:
1724
1725 type a_t;
1726 TYPE b_T, c_T, res_T;
1727
1728 S0 c_T = ;
1729 S1 a_t = (type) c_T;
1730 S2 b_T = ;
1731 S3 res_T = b_T op a_t;
1732
1733 Input/Output:
1734
1735 * STMTS: Contains a stmt from which the pattern search begins,
1736 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1737 with a shift/rotate which has same type on both operands, in the
1738 second case just b_T op c_T, in the first case with added cast
1739 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1740
1741 Output:
1742
1743 * TYPE_IN: The type of the input arguments to the pattern.
1744
1745 * TYPE_OUT: The type of the output of this pattern.
1746
1747 * Return value: A new stmt that will be used to replace the shift/rotate
1748 S3 stmt. */
1749
1750 static gimple
1751 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1752 tree *type_in, tree *type_out)
1753 {
1754 gimple last_stmt = stmts->pop ();
1755 tree oprnd0, oprnd1, lhs, var;
1756 gimple pattern_stmt, def_stmt;
1757 enum tree_code rhs_code;
1758 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1759 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1760 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1761 enum vect_def_type dt;
1762 tree def;
1763
1764 if (!is_gimple_assign (last_stmt))
1765 return NULL;
1766
1767 rhs_code = gimple_assign_rhs_code (last_stmt);
1768 switch (rhs_code)
1769 {
1770 case LSHIFT_EXPR:
1771 case RSHIFT_EXPR:
1772 case LROTATE_EXPR:
1773 case RROTATE_EXPR:
1774 break;
1775 default:
1776 return NULL;
1777 }
1778
1779 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1780 return NULL;
1781
1782 lhs = gimple_assign_lhs (last_stmt);
1783 oprnd0 = gimple_assign_rhs1 (last_stmt);
1784 oprnd1 = gimple_assign_rhs2 (last_stmt);
1785 if (TREE_CODE (oprnd0) != SSA_NAME
1786 || TREE_CODE (oprnd1) != SSA_NAME
1787 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1788 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1789 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1790 || TYPE_PRECISION (TREE_TYPE (lhs))
1791 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1792 return NULL;
1793
1794 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1795 &def, &dt))
1796 return NULL;
1797
1798 if (dt != vect_internal_def)
1799 return NULL;
1800
1801 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1802 *type_out = *type_in;
1803 if (*type_in == NULL_TREE)
1804 return NULL;
1805
1806 def = NULL_TREE;
1807 if (gimple_assign_cast_p (def_stmt))
1808 {
1809 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1810 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1811 && TYPE_PRECISION (TREE_TYPE (rhs1))
1812 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1813 def = rhs1;
1814 }
1815
1816 if (def == NULL_TREE)
1817 {
1818 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1819 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1820 NULL_TREE);
1821 new_pattern_def_seq (stmt_vinfo, def_stmt);
1822 }
1823
1824 /* Pattern detected. */
1825 if (dump_enabled_p ())
1826 dump_printf_loc (MSG_NOTE, vect_location,
1827 "vect_recog_vector_vector_shift_pattern: detected: ");
1828
1829 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1830 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1831 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1832
1833 if (dump_enabled_p ())
1834 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1835
1836 stmts->safe_push (last_stmt);
1837 return pattern_stmt;
1838 }
1839
1840 /* Detect a signed division by a constant that wouldn't be
1841 otherwise vectorized:
1842
1843 type a_t, b_t;
1844
1845 S1 a_t = b_t / N;
1846
1847 where type 'type' is an integral type and N is a constant.
1848
1849 Similarly handle modulo by a constant:
1850
1851 S4 a_t = b_t % N;
1852
1853 Input/Output:
1854
1855 * STMTS: Contains a stmt from which the pattern search begins,
1856 i.e. the division stmt. S1 is replaced by if N is a power
1857 of two constant and type is signed:
1858 S3 y_t = b_t < 0 ? N - 1 : 0;
1859 S2 x_t = b_t + y_t;
1860 S1' a_t = x_t >> log2 (N);
1861
1862 S4 is replaced if N is a power of two constant and
1863 type is signed by (where *_T temporaries have unsigned type):
1864 S9 y_T = b_t < 0 ? -1U : 0U;
1865 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1866 S7 z_t = (type) z_T;
1867 S6 w_t = b_t + z_t;
1868 S5 x_t = w_t & (N - 1);
1869 S4' a_t = x_t - z_t;
1870
1871 Output:
1872
1873 * TYPE_IN: The type of the input arguments to the pattern.
1874
1875 * TYPE_OUT: The type of the output of this pattern.
1876
1877 * Return value: A new stmt that will be used to replace the division
1878 S1 or modulo S4 stmt. */
1879
1880 static gimple
1881 vect_recog_divmod_pattern (vec<gimple> *stmts,
1882 tree *type_in, tree *type_out)
1883 {
1884 gimple last_stmt = stmts->pop ();
1885 tree oprnd0, oprnd1, vectype, itype, cond;
1886 gimple pattern_stmt, def_stmt;
1887 enum tree_code rhs_code;
1888 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1889 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1890 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1891 optab optab;
1892 tree q;
1893 int dummy_int, prec;
1894 stmt_vec_info def_stmt_vinfo;
1895
1896 if (!is_gimple_assign (last_stmt))
1897 return NULL;
1898
1899 rhs_code = gimple_assign_rhs_code (last_stmt);
1900 switch (rhs_code)
1901 {
1902 case TRUNC_DIV_EXPR:
1903 case TRUNC_MOD_EXPR:
1904 break;
1905 default:
1906 return NULL;
1907 }
1908
1909 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1910 return NULL;
1911
1912 oprnd0 = gimple_assign_rhs1 (last_stmt);
1913 oprnd1 = gimple_assign_rhs2 (last_stmt);
1914 itype = TREE_TYPE (oprnd0);
1915 if (TREE_CODE (oprnd0) != SSA_NAME
1916 || TREE_CODE (oprnd1) != INTEGER_CST
1917 || TREE_CODE (itype) != INTEGER_TYPE
1918 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1919 return NULL;
1920
1921 vectype = get_vectype_for_scalar_type (itype);
1922 if (vectype == NULL_TREE)
1923 return NULL;
1924
1925 /* If the target can handle vectorized division or modulo natively,
1926 don't attempt to optimize this. */
1927 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1928 if (optab != unknown_optab)
1929 {
1930 enum machine_mode vec_mode = TYPE_MODE (vectype);
1931 int icode = (int) optab_handler (optab, vec_mode);
1932 if (icode != CODE_FOR_nothing)
1933 return NULL;
1934 }
1935
1936 prec = TYPE_PRECISION (itype);
1937 if (integer_pow2p (oprnd1))
1938 {
1939 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1940 return NULL;
1941
1942 /* Pattern detected. */
1943 if (dump_enabled_p ())
1944 dump_printf_loc (MSG_NOTE, vect_location,
1945 "vect_recog_divmod_pattern: detected: ");
1946
1947 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1948 build_int_cst (itype, 0));
1949 if (rhs_code == TRUNC_DIV_EXPR)
1950 {
1951 tree var = vect_recog_temp_ssa_var (itype, NULL);
1952 tree shift;
1953 def_stmt
1954 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1955 fold_build2 (MINUS_EXPR, itype,
1956 oprnd1,
1957 build_int_cst (itype,
1958 1)),
1959 build_int_cst (itype, 0));
1960 new_pattern_def_seq (stmt_vinfo, def_stmt);
1961 var = vect_recog_temp_ssa_var (itype, NULL);
1962 def_stmt
1963 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1964 gimple_assign_lhs (def_stmt));
1965 append_pattern_def_seq (stmt_vinfo, def_stmt);
1966
1967 shift = build_int_cst (itype, tree_log2 (oprnd1));
1968 pattern_stmt
1969 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1970 vect_recog_temp_ssa_var (itype,
1971 NULL),
1972 var, shift);
1973 }
1974 else
1975 {
1976 tree signmask;
1977 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1978 if (compare_tree_int (oprnd1, 2) == 0)
1979 {
1980 signmask = vect_recog_temp_ssa_var (itype, NULL);
1981 def_stmt
1982 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1983 build_int_cst (itype, 1),
1984 build_int_cst (itype, 0));
1985 append_pattern_def_seq (stmt_vinfo, def_stmt);
1986 }
1987 else
1988 {
1989 tree utype
1990 = build_nonstandard_integer_type (prec, 1);
1991 tree vecutype = get_vectype_for_scalar_type (utype);
1992 tree shift
1993 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1994 - tree_log2 (oprnd1));
1995 tree var = vect_recog_temp_ssa_var (utype, NULL);
1996
1997 def_stmt
1998 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1999 build_int_cst (utype, -1),
2000 build_int_cst (utype, 0));
2001 def_stmt_vinfo
2002 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2003 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2004 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2005 append_pattern_def_seq (stmt_vinfo, def_stmt);
2006 var = vect_recog_temp_ssa_var (utype, NULL);
2007 def_stmt
2008 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2009 gimple_assign_lhs (def_stmt),
2010 shift);
2011 def_stmt_vinfo
2012 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2013 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2014 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2015 append_pattern_def_seq (stmt_vinfo, def_stmt);
2016 signmask = vect_recog_temp_ssa_var (itype, NULL);
2017 def_stmt
2018 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2019 NULL_TREE);
2020 append_pattern_def_seq (stmt_vinfo, def_stmt);
2021 }
2022 def_stmt
2023 = gimple_build_assign_with_ops (PLUS_EXPR,
2024 vect_recog_temp_ssa_var (itype,
2025 NULL),
2026 oprnd0, signmask);
2027 append_pattern_def_seq (stmt_vinfo, def_stmt);
2028 def_stmt
2029 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2030 vect_recog_temp_ssa_var (itype,
2031 NULL),
2032 gimple_assign_lhs (def_stmt),
2033 fold_build2 (MINUS_EXPR, itype,
2034 oprnd1,
2035 build_int_cst (itype,
2036 1)));
2037 append_pattern_def_seq (stmt_vinfo, def_stmt);
2038
2039 pattern_stmt
2040 = gimple_build_assign_with_ops (MINUS_EXPR,
2041 vect_recog_temp_ssa_var (itype,
2042 NULL),
2043 gimple_assign_lhs (def_stmt),
2044 signmask);
2045 }
2046
2047 if (dump_enabled_p ())
2048 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2049 0);
2050
2051 stmts->safe_push (last_stmt);
2052
2053 *type_in = vectype;
2054 *type_out = vectype;
2055 return pattern_stmt;
2056 }
2057
2058 if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
2059 || integer_zerop (oprnd1)
2060 || prec > HOST_BITS_PER_WIDE_INT)
2061 return NULL;
2062
2063 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2064 return NULL;
2065
2066 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2067
2068 if (TYPE_UNSIGNED (itype))
2069 {
2070 unsigned HOST_WIDE_INT mh, ml;
2071 int pre_shift, post_shift;
2072 unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
2073 & GET_MODE_MASK (TYPE_MODE (itype));
2074 tree t1, t2, t3, t4;
2075
2076 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2077 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2078 return NULL;
2079
2080 /* Find a suitable multiplier and right shift count
2081 instead of multiplying with D. */
2082 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2083
2084 /* If the suggested multiplier is more than SIZE bits, we can do better
2085 for even divisors, using an initial right shift. */
2086 if (mh != 0 && (d & 1) == 0)
2087 {
2088 pre_shift = floor_log2 (d & -d);
2089 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2090 &ml, &post_shift, &dummy_int);
2091 gcc_assert (!mh);
2092 }
2093 else
2094 pre_shift = 0;
2095
2096 if (mh != 0)
2097 {
2098 if (post_shift - 1 >= prec)
2099 return NULL;
2100
2101 /* t1 = oprnd0 h* ml;
2102 t2 = oprnd0 - t1;
2103 t3 = t2 >> 1;
2104 t4 = t1 + t3;
2105 q = t4 >> (post_shift - 1); */
2106 t1 = vect_recog_temp_ssa_var (itype, NULL);
2107 def_stmt
2108 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2109 build_int_cst (itype, ml));
2110 append_pattern_def_seq (stmt_vinfo, def_stmt);
2111
2112 t2 = vect_recog_temp_ssa_var (itype, NULL);
2113 def_stmt
2114 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2115 append_pattern_def_seq (stmt_vinfo, def_stmt);
2116
2117 t3 = vect_recog_temp_ssa_var (itype, NULL);
2118 def_stmt
2119 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2120 integer_one_node);
2121 append_pattern_def_seq (stmt_vinfo, def_stmt);
2122
2123 t4 = vect_recog_temp_ssa_var (itype, NULL);
2124 def_stmt
2125 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2126
2127 if (post_shift != 1)
2128 {
2129 append_pattern_def_seq (stmt_vinfo, def_stmt);
2130
2131 q = vect_recog_temp_ssa_var (itype, NULL);
2132 pattern_stmt
2133 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2134 build_int_cst (itype,
2135 post_shift
2136 - 1));
2137 }
2138 else
2139 {
2140 q = t4;
2141 pattern_stmt = def_stmt;
2142 }
2143 }
2144 else
2145 {
2146 if (pre_shift >= prec || post_shift >= prec)
2147 return NULL;
2148
2149 /* t1 = oprnd0 >> pre_shift;
2150 t2 = t1 h* ml;
2151 q = t2 >> post_shift; */
2152 if (pre_shift)
2153 {
2154 t1 = vect_recog_temp_ssa_var (itype, NULL);
2155 def_stmt
2156 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2157 build_int_cst (NULL,
2158 pre_shift));
2159 append_pattern_def_seq (stmt_vinfo, def_stmt);
2160 }
2161 else
2162 t1 = oprnd0;
2163
2164 t2 = vect_recog_temp_ssa_var (itype, NULL);
2165 def_stmt
2166 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2167 build_int_cst (itype, ml));
2168
2169 if (post_shift)
2170 {
2171 append_pattern_def_seq (stmt_vinfo, def_stmt);
2172
2173 q = vect_recog_temp_ssa_var (itype, NULL);
2174 def_stmt
2175 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2176 build_int_cst (itype,
2177 post_shift));
2178 }
2179 else
2180 q = t2;
2181
2182 pattern_stmt = def_stmt;
2183 }
2184 }
2185 else
2186 {
2187 unsigned HOST_WIDE_INT ml;
2188 int post_shift;
2189 HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
2190 unsigned HOST_WIDE_INT abs_d;
2191 bool add = false;
2192 tree t1, t2, t3, t4;
2193
2194 /* Give up for -1. */
2195 if (d == -1)
2196 return NULL;
2197
2198 /* Since d might be INT_MIN, we have to cast to
2199 unsigned HOST_WIDE_INT before negating to avoid
2200 undefined signed overflow. */
2201 abs_d = (d >= 0
2202 ? (unsigned HOST_WIDE_INT) d
2203 : - (unsigned HOST_WIDE_INT) d);
2204
2205 /* n rem d = n rem -d */
2206 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2207 {
2208 d = abs_d;
2209 oprnd1 = build_int_cst (itype, abs_d);
2210 }
2211 else if (HOST_BITS_PER_WIDE_INT >= prec
2212 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2213 /* This case is not handled correctly below. */
2214 return NULL;
2215
2216 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2217 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2218 {
2219 add = true;
2220 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2221 }
2222 if (post_shift >= prec)
2223 return NULL;
2224
2225 /* t1 = oprnd1 h* ml; */
2226 t1 = vect_recog_temp_ssa_var (itype, NULL);
2227 def_stmt
2228 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2229 build_int_cst (itype, ml));
2230 append_pattern_def_seq (stmt_vinfo, def_stmt);
2231
2232 if (add)
2233 {
2234 /* t2 = t1 + oprnd0; */
2235 t2 = vect_recog_temp_ssa_var (itype, NULL);
2236 def_stmt
2237 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2238 append_pattern_def_seq (stmt_vinfo, def_stmt);
2239 }
2240 else
2241 t2 = t1;
2242
2243 if (post_shift)
2244 {
2245 /* t3 = t2 >> post_shift; */
2246 t3 = vect_recog_temp_ssa_var (itype, NULL);
2247 def_stmt
2248 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2249 build_int_cst (itype, post_shift));
2250 append_pattern_def_seq (stmt_vinfo, def_stmt);
2251 }
2252 else
2253 t3 = t2;
2254
2255 /* t4 = oprnd0 >> (prec - 1); */
2256 t4 = vect_recog_temp_ssa_var (itype, NULL);
2257 def_stmt
2258 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2259 build_int_cst (itype, prec - 1));
2260 append_pattern_def_seq (stmt_vinfo, def_stmt);
2261
2262 /* q = t3 - t4; or q = t4 - t3; */
2263 q = vect_recog_temp_ssa_var (itype, NULL);
2264 pattern_stmt
2265 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2266 d < 0 ? t3 : t4);
2267 }
2268
2269 if (rhs_code == TRUNC_MOD_EXPR)
2270 {
2271 tree r, t1;
2272
2273 /* We divided. Now finish by:
2274 t1 = q * oprnd1;
2275 r = oprnd0 - t1; */
2276 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2277
2278 t1 = vect_recog_temp_ssa_var (itype, NULL);
2279 def_stmt
2280 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2281 append_pattern_def_seq (stmt_vinfo, def_stmt);
2282
2283 r = vect_recog_temp_ssa_var (itype, NULL);
2284 pattern_stmt
2285 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2286 }
2287
2288 /* Pattern detected. */
2289 if (dump_enabled_p ())
2290 {
2291 dump_printf_loc (MSG_NOTE, vect_location,
2292 "vect_recog_divmod_pattern: detected: ");
2293 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2294 }
2295
2296 stmts->safe_push (last_stmt);
2297
2298 *type_in = vectype;
2299 *type_out = vectype;
2300 return pattern_stmt;
2301 }
2302
2303 /* Function vect_recog_mixed_size_cond_pattern
2304
2305 Try to find the following pattern:
2306
2307 type x_t, y_t;
2308 TYPE a_T, b_T, c_T;
2309 loop:
2310 S1 a_T = x_t CMP y_t ? b_T : c_T;
2311
2312 where type 'TYPE' is an integral type which has different size
2313 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2314 than 'type', the constants need to fit into an integer type
2315 with the same width as 'type') or results of conversion from 'type'.
2316
2317 Input:
2318
2319 * LAST_STMT: A stmt from which the pattern search begins.
2320
2321 Output:
2322
2323 * TYPE_IN: The type of the input arguments to the pattern.
2324
2325 * TYPE_OUT: The type of the output of this pattern.
2326
2327 * Return value: A new stmt that will be used to replace the pattern.
2328 Additionally a def_stmt is added.
2329
2330 a_it = x_t CMP y_t ? b_it : c_it;
2331 a_T = (TYPE) a_it; */
2332
2333 static gimple
2334 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2335 tree *type_out)
2336 {
2337 gimple last_stmt = (*stmts)[0];
2338 tree cond_expr, then_clause, else_clause;
2339 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2340 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2341 enum machine_mode cmpmode;
2342 gimple pattern_stmt, def_stmt;
2343 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2344 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2345 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2346 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2347 bool promotion;
2348 tree comp_scalar_type;
2349
2350 if (!is_gimple_assign (last_stmt)
2351 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2352 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2353 return NULL;
2354
2355 cond_expr = gimple_assign_rhs1 (last_stmt);
2356 then_clause = gimple_assign_rhs2 (last_stmt);
2357 else_clause = gimple_assign_rhs3 (last_stmt);
2358
2359 if (!COMPARISON_CLASS_P (cond_expr))
2360 return NULL;
2361
2362 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2363 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2364 if (comp_vectype == NULL_TREE)
2365 return NULL;
2366
2367 type = gimple_expr_type (last_stmt);
2368 if (types_compatible_p (type, comp_scalar_type)
2369 || ((TREE_CODE (then_clause) != INTEGER_CST
2370 || TREE_CODE (else_clause) != INTEGER_CST)
2371 && !INTEGRAL_TYPE_P (comp_scalar_type))
2372 || !INTEGRAL_TYPE_P (type))
2373 return NULL;
2374
2375 if ((TREE_CODE (then_clause) != INTEGER_CST
2376 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2377 &def_stmt0, &promotion))
2378 || (TREE_CODE (else_clause) != INTEGER_CST
2379 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2380 &def_stmt1, &promotion)))
2381 return NULL;
2382
2383 if (orig_type0 && orig_type1
2384 && !types_compatible_p (orig_type0, orig_type1))
2385 return NULL;
2386
2387 if (orig_type0)
2388 {
2389 if (!types_compatible_p (orig_type0, comp_scalar_type))
2390 return NULL;
2391 then_clause = gimple_assign_rhs1 (def_stmt0);
2392 itype = orig_type0;
2393 }
2394
2395 if (orig_type1)
2396 {
2397 if (!types_compatible_p (orig_type1, comp_scalar_type))
2398 return NULL;
2399 else_clause = gimple_assign_rhs1 (def_stmt1);
2400 itype = orig_type1;
2401 }
2402
2403 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2404
2405 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2406 return NULL;
2407
2408 vectype = get_vectype_for_scalar_type (type);
2409 if (vectype == NULL_TREE)
2410 return NULL;
2411
2412 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2413 return NULL;
2414
2415 if (itype == NULL_TREE)
2416 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2417 TYPE_UNSIGNED (type));
2418
2419 if (itype == NULL_TREE
2420 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2421 return NULL;
2422
2423 vecitype = get_vectype_for_scalar_type (itype);
2424 if (vecitype == NULL_TREE)
2425 return NULL;
2426
2427 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2428 return NULL;
2429
2430 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2431 {
2432 if ((TREE_CODE (then_clause) == INTEGER_CST
2433 && !int_fits_type_p (then_clause, itype))
2434 || (TREE_CODE (else_clause) == INTEGER_CST
2435 && !int_fits_type_p (else_clause, itype)))
2436 return NULL;
2437 }
2438
2439 def_stmt
2440 = gimple_build_assign_with_ops (COND_EXPR,
2441 vect_recog_temp_ssa_var (itype, NULL),
2442 unshare_expr (cond_expr),
2443 fold_convert (itype, then_clause),
2444 fold_convert (itype, else_clause));
2445 pattern_stmt
2446 = gimple_build_assign_with_ops (NOP_EXPR,
2447 vect_recog_temp_ssa_var (type, NULL),
2448 gimple_assign_lhs (def_stmt), NULL_TREE);
2449
2450 new_pattern_def_seq (stmt_vinfo, def_stmt);
2451 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2452 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2453 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2454 *type_in = vecitype;
2455 *type_out = vectype;
2456
2457 if (dump_enabled_p ())
2458 dump_printf_loc (MSG_NOTE, vect_location,
2459 "vect_recog_mixed_size_cond_pattern: detected: ");
2460
2461 return pattern_stmt;
2462 }
2463
2464
2465 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2466 true if bool VAR can be optimized that way. */
2467
2468 static bool
2469 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2470 {
2471 gimple def_stmt;
2472 enum vect_def_type dt;
2473 tree def, rhs1;
2474 enum tree_code rhs_code;
2475
2476 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2477 &dt))
2478 return false;
2479
2480 if (dt != vect_internal_def)
2481 return false;
2482
2483 if (!is_gimple_assign (def_stmt))
2484 return false;
2485
2486 if (!has_single_use (def))
2487 return false;
2488
2489 rhs1 = gimple_assign_rhs1 (def_stmt);
2490 rhs_code = gimple_assign_rhs_code (def_stmt);
2491 switch (rhs_code)
2492 {
2493 case SSA_NAME:
2494 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2495
2496 CASE_CONVERT:
2497 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2498 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2499 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2500 return false;
2501 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2502
2503 case BIT_NOT_EXPR:
2504 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2505
2506 case BIT_AND_EXPR:
2507 case BIT_IOR_EXPR:
2508 case BIT_XOR_EXPR:
2509 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2510 return false;
2511 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2512 bb_vinfo);
2513
2514 default:
2515 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2516 {
2517 tree vecitype, comp_vectype;
2518
2519 /* If the comparison can throw, then is_gimple_condexpr will be
2520 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2521 if (stmt_could_throw_p (def_stmt))
2522 return false;
2523
2524 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2525 if (comp_vectype == NULL_TREE)
2526 return false;
2527
2528 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2529 {
2530 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2531 tree itype
2532 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2533 vecitype = get_vectype_for_scalar_type (itype);
2534 if (vecitype == NULL_TREE)
2535 return false;
2536 }
2537 else
2538 vecitype = comp_vectype;
2539 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2540 }
2541 return false;
2542 }
2543 }
2544
2545
2546 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2547 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2548 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2549
2550 static tree
2551 adjust_bool_pattern_cast (tree type, tree var)
2552 {
2553 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2554 gimple cast_stmt, pattern_stmt;
2555
2556 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2557 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2558 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2559 cast_stmt
2560 = gimple_build_assign_with_ops (NOP_EXPR,
2561 vect_recog_temp_ssa_var (type, NULL),
2562 gimple_assign_lhs (pattern_stmt),
2563 NULL_TREE);
2564 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2565 return gimple_assign_lhs (cast_stmt);
2566 }
2567
2568
2569 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2570 recursively. VAR is an SSA_NAME that should be transformed from bool
2571 to a wider integer type, OUT_TYPE is the desired final integer type of
2572 the whole pattern, TRUEVAL should be NULL unless optimizing
2573 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2574 in the then_clause, STMTS is where statements with added pattern stmts
2575 should be pushed to. */
2576
2577 static tree
2578 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2579 vec<gimple> *stmts)
2580 {
2581 gimple stmt = SSA_NAME_DEF_STMT (var);
2582 enum tree_code rhs_code, def_rhs_code;
2583 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2584 location_t loc;
2585 gimple pattern_stmt, def_stmt;
2586
2587 rhs1 = gimple_assign_rhs1 (stmt);
2588 rhs2 = gimple_assign_rhs2 (stmt);
2589 rhs_code = gimple_assign_rhs_code (stmt);
2590 loc = gimple_location (stmt);
2591 switch (rhs_code)
2592 {
2593 case SSA_NAME:
2594 CASE_CONVERT:
2595 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2596 itype = TREE_TYPE (irhs1);
2597 pattern_stmt
2598 = gimple_build_assign_with_ops (SSA_NAME,
2599 vect_recog_temp_ssa_var (itype, NULL),
2600 irhs1, NULL_TREE);
2601 break;
2602
2603 case BIT_NOT_EXPR:
2604 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2605 itype = TREE_TYPE (irhs1);
2606 pattern_stmt
2607 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2608 vect_recog_temp_ssa_var (itype, NULL),
2609 irhs1, build_int_cst (itype, 1));
2610 break;
2611
2612 case BIT_AND_EXPR:
2613 /* Try to optimize x = y & (a < b ? 1 : 0); into
2614 x = (a < b ? y : 0);
2615
2616 E.g. for:
2617 bool a_b, b_b, c_b;
2618 TYPE d_T;
2619
2620 S1 a_b = x1 CMP1 y1;
2621 S2 b_b = x2 CMP2 y2;
2622 S3 c_b = a_b & b_b;
2623 S4 d_T = (TYPE) c_b;
2624
2625 we would normally emit:
2626
2627 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2628 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2629 S3' c_T = a_T & b_T;
2630 S4' d_T = c_T;
2631
2632 but we can save one stmt by using the
2633 result of one of the COND_EXPRs in the other COND_EXPR and leave
2634 BIT_AND_EXPR stmt out:
2635
2636 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2637 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2638 S4' f_T = c_T;
2639
2640 At least when VEC_COND_EXPR is implemented using masks
2641 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2642 computes the comparison masks and ands it, in one case with
2643 all ones vector, in the other case with a vector register.
2644 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2645 often more expensive. */
2646 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2647 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2648 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2649 {
2650 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2651 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2652 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2653 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2654 {
2655 gimple tstmt;
2656 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2657 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2658 tstmt = stmts->pop ();
2659 gcc_assert (tstmt == def_stmt);
2660 stmts->quick_push (stmt);
2661 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2662 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2663 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2664 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2665 return irhs2;
2666 }
2667 else
2668 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2669 goto and_ior_xor;
2670 }
2671 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2672 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2673 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2674 {
2675 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2676 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2677 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2678 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2679 {
2680 gimple tstmt;
2681 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2682 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2683 tstmt = stmts->pop ();
2684 gcc_assert (tstmt == def_stmt);
2685 stmts->quick_push (stmt);
2686 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2687 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2688 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2689 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2690 return irhs1;
2691 }
2692 else
2693 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2694 goto and_ior_xor;
2695 }
2696 /* FALLTHRU */
2697 case BIT_IOR_EXPR:
2698 case BIT_XOR_EXPR:
2699 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2700 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2701 and_ior_xor:
2702 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2703 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2704 {
2705 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2706 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2707 int out_prec = TYPE_PRECISION (out_type);
2708 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2709 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2710 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2711 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2712 else
2713 {
2714 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2715 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2716 }
2717 }
2718 itype = TREE_TYPE (irhs1);
2719 pattern_stmt
2720 = gimple_build_assign_with_ops (rhs_code,
2721 vect_recog_temp_ssa_var (itype, NULL),
2722 irhs1, irhs2);
2723 break;
2724
2725 default:
2726 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2727 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2728 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2729 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2730 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2731 {
2732 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2733 itype
2734 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2735 }
2736 else
2737 itype = TREE_TYPE (rhs1);
2738 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2739 if (trueval == NULL_TREE)
2740 trueval = build_int_cst (itype, 1);
2741 else
2742 gcc_checking_assert (useless_type_conversion_p (itype,
2743 TREE_TYPE (trueval)));
2744 pattern_stmt
2745 = gimple_build_assign_with_ops (COND_EXPR,
2746 vect_recog_temp_ssa_var (itype, NULL),
2747 cond_expr, trueval,
2748 build_int_cst (itype, 0));
2749 break;
2750 }
2751
2752 stmts->safe_push (stmt);
2753 gimple_set_location (pattern_stmt, loc);
2754 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2755 return gimple_assign_lhs (pattern_stmt);
2756 }
2757
2758
2759 /* Function vect_recog_bool_pattern
2760
2761 Try to find pattern like following:
2762
2763 bool a_b, b_b, c_b, d_b, e_b;
2764 TYPE f_T;
2765 loop:
2766 S1 a_b = x1 CMP1 y1;
2767 S2 b_b = x2 CMP2 y2;
2768 S3 c_b = a_b & b_b;
2769 S4 d_b = x3 CMP3 y3;
2770 S5 e_b = c_b | d_b;
2771 S6 f_T = (TYPE) e_b;
2772
2773 where type 'TYPE' is an integral type.
2774
2775 Input:
2776
2777 * LAST_STMT: A stmt at the end from which the pattern
2778 search begins, i.e. cast of a bool to
2779 an integer type.
2780
2781 Output:
2782
2783 * TYPE_IN: The type of the input arguments to the pattern.
2784
2785 * TYPE_OUT: The type of the output of this pattern.
2786
2787 * Return value: A new stmt that will be used to replace the pattern.
2788
2789 Assuming size of TYPE is the same as size of all comparisons
2790 (otherwise some casts would be added where needed), the above
2791 sequence we create related pattern stmts:
2792 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2793 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2794 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2795 S5' e_T = c_T | d_T;
2796 S6' f_T = e_T;
2797
2798 Instead of the above S3' we could emit:
2799 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2800 S3' c_T = a_T | b_T;
2801 but the above is more efficient. */
2802
2803 static gimple
2804 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2805 tree *type_out)
2806 {
2807 gimple last_stmt = stmts->pop ();
2808 enum tree_code rhs_code;
2809 tree var, lhs, rhs, vectype;
2810 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2811 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2812 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2813 gimple pattern_stmt;
2814
2815 if (!is_gimple_assign (last_stmt))
2816 return NULL;
2817
2818 var = gimple_assign_rhs1 (last_stmt);
2819 lhs = gimple_assign_lhs (last_stmt);
2820
2821 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2822 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2823 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2824 return NULL;
2825
2826 rhs_code = gimple_assign_rhs_code (last_stmt);
2827 if (CONVERT_EXPR_CODE_P (rhs_code))
2828 {
2829 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2830 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2831 return NULL;
2832 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2833 if (vectype == NULL_TREE)
2834 return NULL;
2835
2836 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2837 return NULL;
2838
2839 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2840 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2841 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2842 pattern_stmt
2843 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2844 else
2845 pattern_stmt
2846 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2847 *type_out = vectype;
2848 *type_in = vectype;
2849 stmts->safe_push (last_stmt);
2850 if (dump_enabled_p ())
2851 dump_printf_loc (MSG_NOTE, vect_location,
2852 "vect_recog_bool_pattern: detected: ");
2853
2854 return pattern_stmt;
2855 }
2856 else if (rhs_code == SSA_NAME
2857 && STMT_VINFO_DATA_REF (stmt_vinfo))
2858 {
2859 stmt_vec_info pattern_stmt_info;
2860 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2861 gcc_assert (vectype != NULL_TREE);
2862 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2863 return NULL;
2864 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2865 return NULL;
2866
2867 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2868 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2869 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2870 {
2871 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2872 gimple cast_stmt
2873 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2874 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2875 rhs = rhs2;
2876 }
2877 pattern_stmt
2878 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2879 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2880 bb_vinfo);
2881 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2882 STMT_VINFO_DATA_REF (pattern_stmt_info)
2883 = STMT_VINFO_DATA_REF (stmt_vinfo);
2884 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2885 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2886 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2887 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2888 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2889 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2890 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2891 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2892 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2893 *type_out = vectype;
2894 *type_in = vectype;
2895 stmts->safe_push (last_stmt);
2896 if (dump_enabled_p ())
2897 dump_printf_loc (MSG_NOTE, vect_location,
2898 "vect_recog_bool_pattern: detected: ");
2899 return pattern_stmt;
2900 }
2901 else
2902 return NULL;
2903 }
2904
2905
2906 /* Mark statements that are involved in a pattern. */
2907
2908 static inline void
2909 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2910 tree pattern_vectype)
2911 {
2912 stmt_vec_info pattern_stmt_info, def_stmt_info;
2913 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2914 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2915 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2916 gimple def_stmt;
2917
2918 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2919 if (pattern_stmt_info == NULL)
2920 {
2921 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2922 bb_vinfo);
2923 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2924 }
2925 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2926
2927 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2928 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2929 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2930 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2931 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2932 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2933 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2934 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2935 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2936 {
2937 gimple_stmt_iterator si;
2938 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2939 !gsi_end_p (si); gsi_next (&si))
2940 {
2941 def_stmt = gsi_stmt (si);
2942 def_stmt_info = vinfo_for_stmt (def_stmt);
2943 if (def_stmt_info == NULL)
2944 {
2945 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2946 bb_vinfo);
2947 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2948 }
2949 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2950 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2951 STMT_VINFO_DEF_TYPE (def_stmt_info)
2952 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2953 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2954 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2955 }
2956 }
2957 }
2958
2959 /* Function vect_pattern_recog_1
2960
2961 Input:
2962 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2963 computation pattern.
2964 STMT: A stmt from which the pattern search should start.
2965
2966 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2967 expression that computes the same functionality and can be used to
2968 replace the sequence of stmts that are involved in the pattern.
2969
2970 Output:
2971 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2972 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2973 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2974 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2975 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2976 to the available target pattern.
2977
2978 This function also does some bookkeeping, as explained in the documentation
2979 for vect_recog_pattern. */
2980
2981 static void
2982 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2983 gimple_stmt_iterator si,
2984 vec<gimple> *stmts_to_replace)
2985 {
2986 gimple stmt = gsi_stmt (si), pattern_stmt;
2987 stmt_vec_info stmt_info;
2988 loop_vec_info loop_vinfo;
2989 tree pattern_vectype;
2990 tree type_in, type_out;
2991 enum tree_code code;
2992 int i;
2993 gimple next;
2994
2995 stmts_to_replace->truncate (0);
2996 stmts_to_replace->quick_push (stmt);
2997 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2998 if (!pattern_stmt)
2999 return;
3000
3001 stmt = stmts_to_replace->last ();
3002 stmt_info = vinfo_for_stmt (stmt);
3003 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3004
3005 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3006 {
3007 /* No need to check target support (already checked by the pattern
3008 recognition function). */
3009 pattern_vectype = type_out ? type_out : type_in;
3010 }
3011 else
3012 {
3013 enum machine_mode vec_mode;
3014 enum insn_code icode;
3015 optab optab;
3016
3017 /* Check target support */
3018 type_in = get_vectype_for_scalar_type (type_in);
3019 if (!type_in)
3020 return;
3021 if (type_out)
3022 type_out = get_vectype_for_scalar_type (type_out);
3023 else
3024 type_out = type_in;
3025 if (!type_out)
3026 return;
3027 pattern_vectype = type_out;
3028
3029 if (is_gimple_assign (pattern_stmt))
3030 code = gimple_assign_rhs_code (pattern_stmt);
3031 else
3032 {
3033 gcc_assert (is_gimple_call (pattern_stmt));
3034 code = CALL_EXPR;
3035 }
3036
3037 optab = optab_for_tree_code (code, type_in, optab_default);
3038 vec_mode = TYPE_MODE (type_in);
3039 if (!optab
3040 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3041 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3042 return;
3043 }
3044
3045 /* Found a vectorizable pattern. */
3046 if (dump_enabled_p ())
3047 {
3048 dump_printf_loc (MSG_NOTE, vect_location,
3049 "pattern recognized: ");
3050 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3051 }
3052
3053 /* Mark the stmts that are involved in the pattern. */
3054 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3055
3056 /* Patterns cannot be vectorized using SLP, because they change the order of
3057 computation. */
3058 if (loop_vinfo)
3059 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3060 if (next == stmt)
3061 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3062
3063 /* It is possible that additional pattern stmts are created and inserted in
3064 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3065 relevant statements. */
3066 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3067 && (unsigned) i < (stmts_to_replace->length () - 1);
3068 i++)
3069 {
3070 stmt_info = vinfo_for_stmt (stmt);
3071 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3072 if (dump_enabled_p ())
3073 {
3074 dump_printf_loc (MSG_NOTE, vect_location,
3075 "additional pattern stmt: ");
3076 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3077 }
3078
3079 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3080 }
3081 }
3082
3083
3084 /* Function vect_pattern_recog
3085
3086 Input:
3087 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3088 computation idioms.
3089
3090 Output - for each computation idiom that is detected we create a new stmt
3091 that provides the same functionality and that can be vectorized. We
3092 also record some information in the struct_stmt_info of the relevant
3093 stmts, as explained below:
3094
3095 At the entry to this function we have the following stmts, with the
3096 following initial value in the STMT_VINFO fields:
3097
3098 stmt in_pattern_p related_stmt vec_stmt
3099 S1: a_i = .... - - -
3100 S2: a_2 = ..use(a_i).. - - -
3101 S3: a_1 = ..use(a_2).. - - -
3102 S4: a_0 = ..use(a_1).. - - -
3103 S5: ... = ..use(a_0).. - - -
3104
3105 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3106 represented by a single stmt. We then:
3107 - create a new stmt S6 equivalent to the pattern (the stmt is not
3108 inserted into the code)
3109 - fill in the STMT_VINFO fields as follows:
3110
3111 in_pattern_p related_stmt vec_stmt
3112 S1: a_i = .... - - -
3113 S2: a_2 = ..use(a_i).. - - -
3114 S3: a_1 = ..use(a_2).. - - -
3115 S4: a_0 = ..use(a_1).. true S6 -
3116 '---> S6: a_new = .... - S4 -
3117 S5: ... = ..use(a_0).. - - -
3118
3119 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3120 to each other through the RELATED_STMT field).
3121
3122 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3123 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3124 remain irrelevant unless used by stmts other than S4.
3125
3126 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3127 (because they are marked as irrelevant). It will vectorize S6, and record
3128 a pointer to the new vector stmt VS6 from S6 (as usual).
3129 S4 will be skipped, and S5 will be vectorized as usual:
3130
3131 in_pattern_p related_stmt vec_stmt
3132 S1: a_i = .... - - -
3133 S2: a_2 = ..use(a_i).. - - -
3134 S3: a_1 = ..use(a_2).. - - -
3135 > VS6: va_new = .... - - -
3136 S4: a_0 = ..use(a_1).. true S6 VS6
3137 '---> S6: a_new = .... - S4 VS6
3138 > VS5: ... = ..vuse(va_new).. - - -
3139 S5: ... = ..use(a_0).. - - -
3140
3141 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3142 elsewhere), and we'll end up with:
3143
3144 VS6: va_new = ....
3145 VS5: ... = ..vuse(va_new)..
3146
3147 In case of more than one pattern statements, e.g., widen-mult with
3148 intermediate type:
3149
3150 S1 a_t = ;
3151 S2 a_T = (TYPE) a_t;
3152 '--> S3: a_it = (interm_type) a_t;
3153 S4 prod_T = a_T * CONST;
3154 '--> S5: prod_T' = a_it w* CONST;
3155
3156 there may be other users of a_T outside the pattern. In that case S2 will
3157 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3158 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3159 be recorded in S3. */
3160
3161 void
3162 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3163 {
3164 struct loop *loop;
3165 basic_block *bbs;
3166 unsigned int nbbs;
3167 gimple_stmt_iterator si;
3168 unsigned int i, j;
3169 vect_recog_func_ptr vect_recog_func;
3170 vec<gimple> stmts_to_replace;
3171 stmts_to_replace.create (1);
3172 gimple stmt;
3173
3174 if (dump_enabled_p ())
3175 dump_printf_loc (MSG_NOTE, vect_location,
3176 "=== vect_pattern_recog ===");
3177
3178 if (loop_vinfo)
3179 {
3180 loop = LOOP_VINFO_LOOP (loop_vinfo);
3181 bbs = LOOP_VINFO_BBS (loop_vinfo);
3182 nbbs = loop->num_nodes;
3183 }
3184 else
3185 {
3186 bbs = &BB_VINFO_BB (bb_vinfo);
3187 nbbs = 1;
3188 }
3189
3190 /* Scan through the loop stmts, applying the pattern recognition
3191 functions starting at each stmt visited: */
3192 for (i = 0; i < nbbs; i++)
3193 {
3194 basic_block bb = bbs[i];
3195 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3196 {
3197 if (bb_vinfo && (stmt = gsi_stmt (si))
3198 && vinfo_for_stmt (stmt)
3199 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3200 continue;
3201
3202 /* Scan over all generic vect_recog_xxx_pattern functions. */
3203 for (j = 0; j < NUM_PATTERNS; j++)
3204 {
3205 vect_recog_func = vect_vect_recog_func_ptrs[j];
3206 vect_pattern_recog_1 (vect_recog_func, si,
3207 &stmts_to_replace);
3208 }
3209 }
3210 }
3211
3212 stmts_to_replace.release ();
3213 }