]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-patterns.c
Merge in trunk.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "gimple.h"
31 #include "gimple-ssa.h"
32 #include "tree-phinodes.h"
33 #include "ssa-iterators.h"
34 #include "tree-ssanames.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "params.h"
39 #include "tree-data-ref.h"
40 #include "tree-vectorizer.h"
41 #include "recog.h" /* FIXME: for insn_data */
42 #include "diagnostic-core.h"
43 #include "dumpfile.h"
44
45 /* Pattern recognition functions */
46 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
47 tree *);
48 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
49 tree *);
50 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
51 tree *);
52 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
53 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
54 tree *);
55 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
56 tree *, tree *);
57 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
58 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
59 tree *, tree *);
60 static gimple vect_recog_divmod_pattern (vec<gimple> *,
61 tree *, tree *);
62 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
63 tree *, tree *);
64 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
65 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
66 vect_recog_widen_mult_pattern,
67 vect_recog_widen_sum_pattern,
68 vect_recog_dot_prod_pattern,
69 vect_recog_pow_pattern,
70 vect_recog_widen_shift_pattern,
71 vect_recog_over_widening_pattern,
72 vect_recog_rotate_pattern,
73 vect_recog_vector_vector_shift_pattern,
74 vect_recog_divmod_pattern,
75 vect_recog_mixed_size_cond_pattern,
76 vect_recog_bool_pattern};
77
78 static inline void
79 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
80 {
81 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
82 stmt);
83 }
84
85 static inline void
86 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
87 {
88 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
89 append_pattern_def_seq (stmt_info, stmt);
90 }
91
92 /* Check whether STMT2 is in the same loop or basic block as STMT1.
93 Which of the two applies depends on whether we're currently doing
94 loop-based or basic-block-based vectorization, as determined by
95 the vinfo_for_stmt for STMT1 (which must be defined).
96
97 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
98 to be defined as well. */
99
100 static bool
101 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
102 {
103 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
104 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
105 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
106
107 if (!gimple_bb (stmt2))
108 return false;
109
110 if (loop_vinfo)
111 {
112 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
113 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
114 return false;
115 }
116 else
117 {
118 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
119 || gimple_code (stmt2) == GIMPLE_PHI)
120 return false;
121 }
122
123 gcc_assert (vinfo_for_stmt (stmt2));
124 return true;
125 }
126
127 /* If the LHS of DEF_STMT has a single use, and that statement is
128 in the same loop or basic block, return it. */
129
130 static gimple
131 vect_single_imm_use (gimple def_stmt)
132 {
133 tree lhs = gimple_assign_lhs (def_stmt);
134 use_operand_p use_p;
135 gimple use_stmt;
136
137 if (!single_imm_use (lhs, &use_p, &use_stmt))
138 return NULL;
139
140 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
141 return NULL;
142
143 return use_stmt;
144 }
145
146 /* Check whether NAME, an ssa-name used in USE_STMT,
147 is a result of a type promotion or demotion, such that:
148 DEF_STMT: NAME = NOP (name0)
149 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
150 If CHECK_SIGN is TRUE, check that either both types are signed or both are
151 unsigned. */
152
153 static bool
154 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
155 tree *orig_type, gimple *def_stmt, bool *promotion)
156 {
157 tree dummy;
158 gimple dummy_gimple;
159 loop_vec_info loop_vinfo;
160 stmt_vec_info stmt_vinfo;
161 tree type = TREE_TYPE (name);
162 tree oprnd0;
163 enum vect_def_type dt;
164 tree def;
165 bb_vec_info bb_vinfo;
166
167 stmt_vinfo = vinfo_for_stmt (use_stmt);
168 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
169 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
170 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
171 &def, &dt))
172 return false;
173
174 if (dt != vect_internal_def
175 && dt != vect_external_def && dt != vect_constant_def)
176 return false;
177
178 if (!*def_stmt)
179 return false;
180
181 if (!is_gimple_assign (*def_stmt))
182 return false;
183
184 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
185 return false;
186
187 oprnd0 = gimple_assign_rhs1 (*def_stmt);
188
189 *orig_type = TREE_TYPE (oprnd0);
190 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
191 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
192 return false;
193
194 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
195 *promotion = true;
196 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
197 *promotion = false;
198 else
199 return false;
200
201 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
202 bb_vinfo, &dummy_gimple, &dummy, &dt))
203 return false;
204
205 return true;
206 }
207
208 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
209 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
210
211 static tree
212 vect_recog_temp_ssa_var (tree type, gimple stmt)
213 {
214 return make_temp_ssa_name (type, stmt, "patt");
215 }
216
217 /* Function vect_recog_dot_prod_pattern
218
219 Try to find the following pattern:
220
221 type x_t, y_t;
222 TYPE1 prod;
223 TYPE2 sum = init;
224 loop:
225 sum_0 = phi <init, sum_1>
226 S1 x_t = ...
227 S2 y_t = ...
228 S3 x_T = (TYPE1) x_t;
229 S4 y_T = (TYPE1) y_t;
230 S5 prod = x_T * y_T;
231 [S6 prod = (TYPE2) prod; #optional]
232 S7 sum_1 = prod + sum_0;
233
234 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
235 same size of 'TYPE1' or bigger. This is a special case of a reduction
236 computation.
237
238 Input:
239
240 * STMTS: Contains a stmt from which the pattern search begins. In the
241 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
242 will be detected.
243
244 Output:
245
246 * TYPE_IN: The type of the input arguments to the pattern.
247
248 * TYPE_OUT: The type of the output of this pattern.
249
250 * Return value: A new stmt that will be used to replace the sequence of
251 stmts that constitute the pattern. In this case it will be:
252 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
253
254 Note: The dot-prod idiom is a widening reduction pattern that is
255 vectorized without preserving all the intermediate results. It
256 produces only N/2 (widened) results (by summing up pairs of
257 intermediate results) rather than all N results. Therefore, we
258 cannot allow this pattern when we want to get all the results and in
259 the correct order (as is the case when this computation is in an
260 inner-loop nested in an outer-loop that us being vectorized). */
261
262 static gimple
263 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
264 tree *type_out)
265 {
266 gimple stmt, last_stmt = (*stmts)[0];
267 tree oprnd0, oprnd1;
268 tree oprnd00, oprnd01;
269 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
270 tree type, half_type;
271 gimple pattern_stmt;
272 tree prod_type;
273 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
274 struct loop *loop;
275 tree var;
276 bool promotion;
277
278 if (!loop_info)
279 return NULL;
280
281 loop = LOOP_VINFO_LOOP (loop_info);
282
283 if (!is_gimple_assign (last_stmt))
284 return NULL;
285
286 type = gimple_expr_type (last_stmt);
287
288 /* Look for the following pattern
289 DX = (TYPE1) X;
290 DY = (TYPE1) Y;
291 DPROD = DX * DY;
292 DDPROD = (TYPE2) DPROD;
293 sum_1 = DDPROD + sum_0;
294 In which
295 - DX is double the size of X
296 - DY is double the size of Y
297 - DX, DY, DPROD all have the same type
298 - sum is the same size of DPROD or bigger
299 - sum has been recognized as a reduction variable.
300
301 This is equivalent to:
302 DPROD = X w* Y; #widen mult
303 sum_1 = DPROD w+ sum_0; #widen summation
304 or
305 DPROD = X w* Y; #widen mult
306 sum_1 = DPROD + sum_0; #summation
307 */
308
309 /* Starting from LAST_STMT, follow the defs of its uses in search
310 of the above pattern. */
311
312 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
313 return NULL;
314
315 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
316 {
317 /* Has been detected as widening-summation? */
318
319 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
320 type = gimple_expr_type (stmt);
321 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
322 return NULL;
323 oprnd0 = gimple_assign_rhs1 (stmt);
324 oprnd1 = gimple_assign_rhs2 (stmt);
325 half_type = TREE_TYPE (oprnd0);
326 }
327 else
328 {
329 gimple def_stmt;
330
331 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
332 return NULL;
333 oprnd0 = gimple_assign_rhs1 (last_stmt);
334 oprnd1 = gimple_assign_rhs2 (last_stmt);
335 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
336 || !types_compatible_p (TREE_TYPE (oprnd1), type))
337 return NULL;
338 stmt = last_stmt;
339
340 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
341 &promotion)
342 && promotion)
343 {
344 stmt = def_stmt;
345 oprnd0 = gimple_assign_rhs1 (stmt);
346 }
347 else
348 half_type = type;
349 }
350
351 /* So far so good. Since last_stmt was detected as a (summation) reduction,
352 we know that oprnd1 is the reduction variable (defined by a loop-header
353 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
354 Left to check that oprnd0 is defined by a (widen_)mult_expr */
355 if (TREE_CODE (oprnd0) != SSA_NAME)
356 return NULL;
357
358 prod_type = half_type;
359 stmt = SSA_NAME_DEF_STMT (oprnd0);
360
361 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
362 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
363 return NULL;
364
365 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
366 inside the loop (in case we are analyzing an outer-loop). */
367 if (!is_gimple_assign (stmt))
368 return NULL;
369 stmt_vinfo = vinfo_for_stmt (stmt);
370 gcc_assert (stmt_vinfo);
371 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
372 return NULL;
373 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
374 return NULL;
375 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
376 {
377 /* Has been detected as a widening multiplication? */
378
379 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
380 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
381 return NULL;
382 stmt_vinfo = vinfo_for_stmt (stmt);
383 gcc_assert (stmt_vinfo);
384 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
385 oprnd00 = gimple_assign_rhs1 (stmt);
386 oprnd01 = gimple_assign_rhs2 (stmt);
387 }
388 else
389 {
390 tree half_type0, half_type1;
391 gimple def_stmt;
392 tree oprnd0, oprnd1;
393
394 oprnd0 = gimple_assign_rhs1 (stmt);
395 oprnd1 = gimple_assign_rhs2 (stmt);
396 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
397 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
398 return NULL;
399 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
400 &promotion)
401 || !promotion)
402 return NULL;
403 oprnd00 = gimple_assign_rhs1 (def_stmt);
404 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
405 &promotion)
406 || !promotion)
407 return NULL;
408 oprnd01 = gimple_assign_rhs1 (def_stmt);
409 if (!types_compatible_p (half_type0, half_type1))
410 return NULL;
411 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
412 return NULL;
413 }
414
415 half_type = TREE_TYPE (oprnd00);
416 *type_in = half_type;
417 *type_out = type;
418
419 /* Pattern detected. Create a stmt to be used to replace the pattern: */
420 var = vect_recog_temp_ssa_var (type, NULL);
421 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
422 oprnd00, oprnd01, oprnd1);
423
424 if (dump_enabled_p ())
425 {
426 dump_printf_loc (MSG_NOTE, vect_location,
427 "vect_recog_dot_prod_pattern: detected: ");
428 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
429 dump_printf (MSG_NOTE, "\n");
430 }
431
432 /* We don't allow changing the order of the computation in the inner-loop
433 when doing outer-loop vectorization. */
434 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
435
436 return pattern_stmt;
437 }
438
439
440 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
441 and LSHIFT_EXPR.
442
443 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
444 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
445
446 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
447 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
448 that satisfies the above restrictions, we can perform a widening opeartion
449 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
450 with a_it = (interm_type) a_t; */
451
452 static bool
453 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
454 tree const_oprnd, tree *oprnd,
455 vec<gimple> *stmts, tree type,
456 tree *half_type, gimple def_stmt)
457 {
458 tree new_type, new_oprnd;
459 gimple new_stmt;
460
461 if (code != MULT_EXPR && code != LSHIFT_EXPR)
462 return false;
463
464 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
465 || (code == LSHIFT_EXPR
466 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
467 != 1))
468 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
469 {
470 /* CONST_OPRND is a constant of HALF_TYPE. */
471 *oprnd = gimple_assign_rhs1 (def_stmt);
472 return true;
473 }
474
475 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
476 return false;
477
478 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
479 return false;
480
481 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
482 a type 2 times bigger than HALF_TYPE. */
483 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
484 TYPE_UNSIGNED (type));
485 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
486 || (code == LSHIFT_EXPR
487 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
488 return false;
489
490 /* Use NEW_TYPE for widening operation. */
491 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
492 {
493 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
494 /* Check if the already created pattern stmt is what we need. */
495 if (!is_gimple_assign (new_stmt)
496 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
497 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
498 return false;
499
500 stmts->safe_push (def_stmt);
501 *oprnd = gimple_assign_lhs (new_stmt);
502 }
503 else
504 {
505 /* Create a_T = (NEW_TYPE) a_t; */
506 *oprnd = gimple_assign_rhs1 (def_stmt);
507 new_oprnd = make_ssa_name (new_type, NULL);
508 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
509 NULL_TREE);
510 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
511 stmts->safe_push (def_stmt);
512 *oprnd = new_oprnd;
513 }
514
515 *half_type = new_type;
516 return true;
517 }
518
519
520 /* Function vect_recog_widen_mult_pattern
521
522 Try to find the following pattern:
523
524 type a_t, b_t;
525 TYPE a_T, b_T, prod_T;
526
527 S1 a_t = ;
528 S2 b_t = ;
529 S3 a_T = (TYPE) a_t;
530 S4 b_T = (TYPE) b_t;
531 S5 prod_T = a_T * b_T;
532
533 where type 'TYPE' is at least double the size of type 'type'.
534
535 Also detect unsigned cases:
536
537 unsigned type a_t, b_t;
538 unsigned TYPE u_prod_T;
539 TYPE a_T, b_T, prod_T;
540
541 S1 a_t = ;
542 S2 b_t = ;
543 S3 a_T = (TYPE) a_t;
544 S4 b_T = (TYPE) b_t;
545 S5 prod_T = a_T * b_T;
546 S6 u_prod_T = (unsigned TYPE) prod_T;
547
548 and multiplication by constants:
549
550 type a_t;
551 TYPE a_T, prod_T;
552
553 S1 a_t = ;
554 S3 a_T = (TYPE) a_t;
555 S5 prod_T = a_T * CONST;
556
557 A special case of multiplication by constants is when 'TYPE' is 4 times
558 bigger than 'type', but CONST fits an intermediate type 2 times smaller
559 than 'TYPE'. In that case we create an additional pattern stmt for S3
560 to create a variable of the intermediate type, and perform widen-mult
561 on the intermediate type as well:
562
563 type a_t;
564 interm_type a_it;
565 TYPE a_T, prod_T, prod_T';
566
567 S1 a_t = ;
568 S3 a_T = (TYPE) a_t;
569 '--> a_it = (interm_type) a_t;
570 S5 prod_T = a_T * CONST;
571 '--> prod_T' = a_it w* CONST;
572
573 Input/Output:
574
575 * STMTS: Contains a stmt from which the pattern search begins. In the
576 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
577 is detected. In case of unsigned widen-mult, the original stmt (S5) is
578 replaced with S6 in STMTS. In case of multiplication by a constant
579 of an intermediate type (the last case above), STMTS also contains S3
580 (inserted before S5).
581
582 Output:
583
584 * TYPE_IN: The type of the input arguments to the pattern.
585
586 * TYPE_OUT: The type of the output of this pattern.
587
588 * Return value: A new stmt that will be used to replace the sequence of
589 stmts that constitute the pattern. In this case it will be:
590 WIDEN_MULT <a_t, b_t>
591 */
592
593 static gimple
594 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
595 tree *type_in, tree *type_out)
596 {
597 gimple last_stmt = stmts->pop ();
598 gimple def_stmt0, def_stmt1;
599 tree oprnd0, oprnd1;
600 tree type, half_type0, half_type1;
601 gimple pattern_stmt;
602 tree vectype, vectype_out = NULL_TREE;
603 tree var;
604 enum tree_code dummy_code;
605 int dummy_int;
606 vec<tree> dummy_vec;
607 bool op1_ok;
608 bool promotion;
609
610 if (!is_gimple_assign (last_stmt))
611 return NULL;
612
613 type = gimple_expr_type (last_stmt);
614
615 /* Starting from LAST_STMT, follow the defs of its uses in search
616 of the above pattern. */
617
618 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
619 return NULL;
620
621 oprnd0 = gimple_assign_rhs1 (last_stmt);
622 oprnd1 = gimple_assign_rhs2 (last_stmt);
623 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
624 || !types_compatible_p (TREE_TYPE (oprnd1), type))
625 return NULL;
626
627 /* Check argument 0. */
628 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
629 &promotion)
630 || !promotion)
631 return NULL;
632 /* Check argument 1. */
633 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
634 &def_stmt1, &promotion);
635
636 if (op1_ok && promotion)
637 {
638 oprnd0 = gimple_assign_rhs1 (def_stmt0);
639 oprnd1 = gimple_assign_rhs1 (def_stmt1);
640 }
641 else
642 {
643 if (TREE_CODE (oprnd1) == INTEGER_CST
644 && TREE_CODE (half_type0) == INTEGER_TYPE
645 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
646 &oprnd0, stmts, type,
647 &half_type0, def_stmt0))
648 {
649 half_type1 = half_type0;
650 oprnd1 = fold_convert (half_type1, oprnd1);
651 }
652 else
653 return NULL;
654 }
655
656 /* Handle unsigned case. Look for
657 S6 u_prod_T = (unsigned TYPE) prod_T;
658 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
659 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
660 {
661 gimple use_stmt;
662 tree use_lhs;
663 tree use_type;
664
665 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
666 return NULL;
667
668 use_stmt = vect_single_imm_use (last_stmt);
669 if (!use_stmt || !is_gimple_assign (use_stmt)
670 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
671 return NULL;
672
673 use_lhs = gimple_assign_lhs (use_stmt);
674 use_type = TREE_TYPE (use_lhs);
675 if (!INTEGRAL_TYPE_P (use_type)
676 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
677 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
678 return NULL;
679
680 type = use_type;
681 last_stmt = use_stmt;
682 }
683
684 if (!types_compatible_p (half_type0, half_type1))
685 return NULL;
686
687 /* Pattern detected. */
688 if (dump_enabled_p ())
689 dump_printf_loc (MSG_NOTE, vect_location,
690 "vect_recog_widen_mult_pattern: detected:\n");
691
692 /* Check target support */
693 vectype = get_vectype_for_scalar_type (half_type0);
694 vectype_out = get_vectype_for_scalar_type (type);
695 if (!vectype
696 || !vectype_out
697 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
698 vectype_out, vectype,
699 &dummy_code, &dummy_code,
700 &dummy_int, &dummy_vec))
701 return NULL;
702
703 *type_in = vectype;
704 *type_out = vectype_out;
705
706 /* Pattern supported. Create a stmt to be used to replace the pattern: */
707 var = vect_recog_temp_ssa_var (type, NULL);
708 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
709 oprnd1);
710
711 if (dump_enabled_p ())
712 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
713
714 stmts->safe_push (last_stmt);
715 return pattern_stmt;
716 }
717
718
719 /* Function vect_recog_pow_pattern
720
721 Try to find the following pattern:
722
723 x = POW (y, N);
724
725 with POW being one of pow, powf, powi, powif and N being
726 either 2 or 0.5.
727
728 Input:
729
730 * LAST_STMT: A stmt from which the pattern search begins.
731
732 Output:
733
734 * TYPE_IN: The type of the input arguments to the pattern.
735
736 * TYPE_OUT: The type of the output of this pattern.
737
738 * Return value: A new stmt that will be used to replace the sequence of
739 stmts that constitute the pattern. In this case it will be:
740 x = x * x
741 or
742 x = sqrt (x)
743 */
744
745 static gimple
746 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
747 tree *type_out)
748 {
749 gimple last_stmt = (*stmts)[0];
750 tree fn, base, exp = NULL;
751 gimple stmt;
752 tree var;
753
754 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
755 return NULL;
756
757 fn = gimple_call_fndecl (last_stmt);
758 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
759 return NULL;
760
761 switch (DECL_FUNCTION_CODE (fn))
762 {
763 case BUILT_IN_POWIF:
764 case BUILT_IN_POWI:
765 case BUILT_IN_POWF:
766 case BUILT_IN_POW:
767 base = gimple_call_arg (last_stmt, 0);
768 exp = gimple_call_arg (last_stmt, 1);
769 if (TREE_CODE (exp) != REAL_CST
770 && TREE_CODE (exp) != INTEGER_CST)
771 return NULL;
772 break;
773
774 default:
775 return NULL;
776 }
777
778 /* We now have a pow or powi builtin function call with a constant
779 exponent. */
780
781 *type_out = NULL_TREE;
782
783 /* Catch squaring. */
784 if ((tree_fits_shwi_p (exp)
785 && tree_to_shwi (exp) == 2)
786 || (TREE_CODE (exp) == REAL_CST
787 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
788 {
789 *type_in = TREE_TYPE (base);
790
791 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
792 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
793 return stmt;
794 }
795
796 /* Catch square root. */
797 if (TREE_CODE (exp) == REAL_CST
798 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
799 {
800 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
801 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
802 if (*type_in)
803 {
804 gimple stmt = gimple_build_call (newfn, 1, base);
805 if (vectorizable_function (stmt, *type_in, *type_in)
806 != NULL_TREE)
807 {
808 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
809 gimple_call_set_lhs (stmt, var);
810 return stmt;
811 }
812 }
813 }
814
815 return NULL;
816 }
817
818
819 /* Function vect_recog_widen_sum_pattern
820
821 Try to find the following pattern:
822
823 type x_t;
824 TYPE x_T, sum = init;
825 loop:
826 sum_0 = phi <init, sum_1>
827 S1 x_t = *p;
828 S2 x_T = (TYPE) x_t;
829 S3 sum_1 = x_T + sum_0;
830
831 where type 'TYPE' is at least double the size of type 'type', i.e - we're
832 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
833 a special case of a reduction computation.
834
835 Input:
836
837 * LAST_STMT: A stmt from which the pattern search begins. In the example,
838 when this function is called with S3, the pattern {S2,S3} will be detected.
839
840 Output:
841
842 * TYPE_IN: The type of the input arguments to the pattern.
843
844 * TYPE_OUT: The type of the output of this pattern.
845
846 * Return value: A new stmt that will be used to replace the sequence of
847 stmts that constitute the pattern. In this case it will be:
848 WIDEN_SUM <x_t, sum_0>
849
850 Note: The widening-sum idiom is a widening reduction pattern that is
851 vectorized without preserving all the intermediate results. It
852 produces only N/2 (widened) results (by summing up pairs of
853 intermediate results) rather than all N results. Therefore, we
854 cannot allow this pattern when we want to get all the results and in
855 the correct order (as is the case when this computation is in an
856 inner-loop nested in an outer-loop that us being vectorized). */
857
858 static gimple
859 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
860 tree *type_out)
861 {
862 gimple stmt, last_stmt = (*stmts)[0];
863 tree oprnd0, oprnd1;
864 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
865 tree type, half_type;
866 gimple pattern_stmt;
867 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
868 struct loop *loop;
869 tree var;
870 bool promotion;
871
872 if (!loop_info)
873 return NULL;
874
875 loop = LOOP_VINFO_LOOP (loop_info);
876
877 if (!is_gimple_assign (last_stmt))
878 return NULL;
879
880 type = gimple_expr_type (last_stmt);
881
882 /* Look for the following pattern
883 DX = (TYPE) X;
884 sum_1 = DX + sum_0;
885 In which DX is at least double the size of X, and sum_1 has been
886 recognized as a reduction variable.
887 */
888
889 /* Starting from LAST_STMT, follow the defs of its uses in search
890 of the above pattern. */
891
892 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
893 return NULL;
894
895 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
896 return NULL;
897
898 oprnd0 = gimple_assign_rhs1 (last_stmt);
899 oprnd1 = gimple_assign_rhs2 (last_stmt);
900 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
901 || !types_compatible_p (TREE_TYPE (oprnd1), type))
902 return NULL;
903
904 /* So far so good. Since last_stmt was detected as a (summation) reduction,
905 we know that oprnd1 is the reduction variable (defined by a loop-header
906 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
907 Left to check that oprnd0 is defined by a cast from type 'type' to type
908 'TYPE'. */
909
910 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
911 &promotion)
912 || !promotion)
913 return NULL;
914
915 oprnd0 = gimple_assign_rhs1 (stmt);
916 *type_in = half_type;
917 *type_out = type;
918
919 /* Pattern detected. Create a stmt to be used to replace the pattern: */
920 var = vect_recog_temp_ssa_var (type, NULL);
921 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
922 oprnd0, oprnd1);
923
924 if (dump_enabled_p ())
925 {
926 dump_printf_loc (MSG_NOTE, vect_location,
927 "vect_recog_widen_sum_pattern: detected: ");
928 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
929 dump_printf (MSG_NOTE, "\n");
930 }
931
932 /* We don't allow changing the order of the computation in the inner-loop
933 when doing outer-loop vectorization. */
934 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
935
936 return pattern_stmt;
937 }
938
939
940 /* Return TRUE if the operation in STMT can be performed on a smaller type.
941
942 Input:
943 STMT - a statement to check.
944 DEF - we support operations with two operands, one of which is constant.
945 The other operand can be defined by a demotion operation, or by a
946 previous statement in a sequence of over-promoted operations. In the
947 later case DEF is used to replace that operand. (It is defined by a
948 pattern statement we created for the previous statement in the
949 sequence).
950
951 Input/output:
952 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
953 NULL, it's the type of DEF.
954 STMTS - additional pattern statements. If a pattern statement (type
955 conversion) is created in this function, its original statement is
956 added to STMTS.
957
958 Output:
959 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
960 operands to use in the new pattern statement for STMT (will be created
961 in vect_recog_over_widening_pattern ()).
962 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
963 statements for STMT: the first one is a type promotion and the second
964 one is the operation itself. We return the type promotion statement
965 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
966 the second pattern statement. */
967
968 static bool
969 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
970 tree *op0, tree *op1, gimple *new_def_stmt,
971 vec<gimple> *stmts)
972 {
973 enum tree_code code;
974 tree const_oprnd, oprnd;
975 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
976 gimple def_stmt, new_stmt;
977 bool first = false;
978 bool promotion;
979
980 *op0 = NULL_TREE;
981 *op1 = NULL_TREE;
982 *new_def_stmt = NULL;
983
984 if (!is_gimple_assign (stmt))
985 return false;
986
987 code = gimple_assign_rhs_code (stmt);
988 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
989 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
990 return false;
991
992 oprnd = gimple_assign_rhs1 (stmt);
993 const_oprnd = gimple_assign_rhs2 (stmt);
994 type = gimple_expr_type (stmt);
995
996 if (TREE_CODE (oprnd) != SSA_NAME
997 || TREE_CODE (const_oprnd) != INTEGER_CST)
998 return false;
999
1000 /* If oprnd has other uses besides that in stmt we cannot mark it
1001 as being part of a pattern only. */
1002 if (!has_single_use (oprnd))
1003 return false;
1004
1005 /* If we are in the middle of a sequence, we use DEF from a previous
1006 statement. Otherwise, OPRND has to be a result of type promotion. */
1007 if (*new_type)
1008 {
1009 half_type = *new_type;
1010 oprnd = def;
1011 }
1012 else
1013 {
1014 first = true;
1015 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1016 &promotion)
1017 || !promotion
1018 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1019 return false;
1020 }
1021
1022 /* Can we perform the operation on a smaller type? */
1023 switch (code)
1024 {
1025 case BIT_IOR_EXPR:
1026 case BIT_XOR_EXPR:
1027 case BIT_AND_EXPR:
1028 if (!int_fits_type_p (const_oprnd, half_type))
1029 {
1030 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1031 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1032 return false;
1033
1034 interm_type = build_nonstandard_integer_type (
1035 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1036 if (!int_fits_type_p (const_oprnd, interm_type))
1037 return false;
1038 }
1039
1040 break;
1041
1042 case LSHIFT_EXPR:
1043 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1044 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1045 return false;
1046
1047 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1048 (e.g., if the original value was char, the shift amount is at most 8
1049 if we want to use short). */
1050 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1051 return false;
1052
1053 interm_type = build_nonstandard_integer_type (
1054 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1055
1056 if (!vect_supportable_shift (code, interm_type))
1057 return false;
1058
1059 break;
1060
1061 case RSHIFT_EXPR:
1062 if (vect_supportable_shift (code, half_type))
1063 break;
1064
1065 /* Try intermediate type - HALF_TYPE is not supported. */
1066 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1067 return false;
1068
1069 interm_type = build_nonstandard_integer_type (
1070 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1071
1072 if (!vect_supportable_shift (code, interm_type))
1073 return false;
1074
1075 break;
1076
1077 default:
1078 gcc_unreachable ();
1079 }
1080
1081 /* There are four possible cases:
1082 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1083 the first statement in the sequence)
1084 a. The original, HALF_TYPE, is not enough - we replace the promotion
1085 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1086 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1087 promotion.
1088 2. OPRND is defined by a pattern statement we created.
1089 a. Its type is not sufficient for the operation, we create a new stmt:
1090 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1091 this statement in NEW_DEF_STMT, and it is later put in
1092 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1093 b. OPRND is good to use in the new statement. */
1094 if (first)
1095 {
1096 if (interm_type)
1097 {
1098 /* Replace the original type conversion HALF_TYPE->TYPE with
1099 HALF_TYPE->INTERM_TYPE. */
1100 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1101 {
1102 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1103 /* Check if the already created pattern stmt is what we need. */
1104 if (!is_gimple_assign (new_stmt)
1105 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1106 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1107 return false;
1108
1109 stmts->safe_push (def_stmt);
1110 oprnd = gimple_assign_lhs (new_stmt);
1111 }
1112 else
1113 {
1114 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1115 oprnd = gimple_assign_rhs1 (def_stmt);
1116 new_oprnd = make_ssa_name (interm_type, NULL);
1117 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1118 oprnd, NULL_TREE);
1119 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1120 stmts->safe_push (def_stmt);
1121 oprnd = new_oprnd;
1122 }
1123 }
1124 else
1125 {
1126 /* Retrieve the operand before the type promotion. */
1127 oprnd = gimple_assign_rhs1 (def_stmt);
1128 }
1129 }
1130 else
1131 {
1132 if (interm_type)
1133 {
1134 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1135 new_oprnd = make_ssa_name (interm_type, NULL);
1136 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1137 oprnd, NULL_TREE);
1138 oprnd = new_oprnd;
1139 *new_def_stmt = new_stmt;
1140 }
1141
1142 /* Otherwise, OPRND is already set. */
1143 }
1144
1145 if (interm_type)
1146 *new_type = interm_type;
1147 else
1148 *new_type = half_type;
1149
1150 *op0 = oprnd;
1151 *op1 = fold_convert (*new_type, const_oprnd);
1152
1153 return true;
1154 }
1155
1156
1157 /* Try to find a statement or a sequence of statements that can be performed
1158 on a smaller type:
1159
1160 type x_t;
1161 TYPE x_T, res0_T, res1_T;
1162 loop:
1163 S1 x_t = *p;
1164 S2 x_T = (TYPE) x_t;
1165 S3 res0_T = op (x_T, C0);
1166 S4 res1_T = op (res0_T, C1);
1167 S5 ... = () res1_T; - type demotion
1168
1169 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1170 constants.
1171 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1172 be 'type' or some intermediate type. For now, we expect S5 to be a type
1173 demotion operation. We also check that S3 and S4 have only one use. */
1174
1175 static gimple
1176 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1177 tree *type_in, tree *type_out)
1178 {
1179 gimple stmt = stmts->pop ();
1180 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1181 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1182 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1183 bool first;
1184 tree type = NULL;
1185
1186 first = true;
1187 while (1)
1188 {
1189 if (!vinfo_for_stmt (stmt)
1190 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1191 return NULL;
1192
1193 new_def_stmt = NULL;
1194 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1195 &op0, &op1, &new_def_stmt,
1196 stmts))
1197 {
1198 if (first)
1199 return NULL;
1200 else
1201 break;
1202 }
1203
1204 /* STMT can be performed on a smaller type. Check its uses. */
1205 use_stmt = vect_single_imm_use (stmt);
1206 if (!use_stmt || !is_gimple_assign (use_stmt))
1207 return NULL;
1208
1209 /* Create pattern statement for STMT. */
1210 vectype = get_vectype_for_scalar_type (new_type);
1211 if (!vectype)
1212 return NULL;
1213
1214 /* We want to collect all the statements for which we create pattern
1215 statetments, except for the case when the last statement in the
1216 sequence doesn't have a corresponding pattern statement. In such
1217 case we associate the last pattern statement with the last statement
1218 in the sequence. Therefore, we only add the original statement to
1219 the list if we know that it is not the last. */
1220 if (prev_stmt)
1221 stmts->safe_push (prev_stmt);
1222
1223 var = vect_recog_temp_ssa_var (new_type, NULL);
1224 pattern_stmt
1225 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1226 op0, op1);
1227 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1228 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1229
1230 if (dump_enabled_p ())
1231 {
1232 dump_printf_loc (MSG_NOTE, vect_location,
1233 "created pattern stmt: ");
1234 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1235 dump_printf (MSG_NOTE, "\n");
1236 }
1237
1238 type = gimple_expr_type (stmt);
1239 prev_stmt = stmt;
1240 stmt = use_stmt;
1241
1242 first = false;
1243 }
1244
1245 /* We got a sequence. We expect it to end with a type demotion operation.
1246 Otherwise, we quit (for now). There are three possible cases: the
1247 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1248 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1249 NEW_TYPE differs (we create a new conversion statement). */
1250 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1251 {
1252 use_lhs = gimple_assign_lhs (use_stmt);
1253 use_type = TREE_TYPE (use_lhs);
1254 /* Support only type demotion or signedess change. */
1255 if (!INTEGRAL_TYPE_P (use_type)
1256 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1257 return NULL;
1258
1259 /* Check that NEW_TYPE is not bigger than the conversion result. */
1260 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1261 return NULL;
1262
1263 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1264 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1265 {
1266 /* Create NEW_TYPE->USE_TYPE conversion. */
1267 new_oprnd = make_ssa_name (use_type, NULL);
1268 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1269 var, NULL_TREE);
1270 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1271
1272 *type_in = get_vectype_for_scalar_type (new_type);
1273 *type_out = get_vectype_for_scalar_type (use_type);
1274
1275 /* We created a pattern statement for the last statement in the
1276 sequence, so we don't need to associate it with the pattern
1277 statement created for PREV_STMT. Therefore, we add PREV_STMT
1278 to the list in order to mark it later in vect_pattern_recog_1. */
1279 if (prev_stmt)
1280 stmts->safe_push (prev_stmt);
1281 }
1282 else
1283 {
1284 if (prev_stmt)
1285 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1286 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1287
1288 *type_in = vectype;
1289 *type_out = NULL_TREE;
1290 }
1291
1292 stmts->safe_push (use_stmt);
1293 }
1294 else
1295 /* TODO: support general case, create a conversion to the correct type. */
1296 return NULL;
1297
1298 /* Pattern detected. */
1299 if (dump_enabled_p ())
1300 {
1301 dump_printf_loc (MSG_NOTE, vect_location,
1302 "vect_recog_over_widening_pattern: detected: ");
1303 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1304 dump_printf (MSG_NOTE, "\n");
1305 }
1306
1307 return pattern_stmt;
1308 }
1309
1310 /* Detect widening shift pattern:
1311
1312 type a_t;
1313 TYPE a_T, res_T;
1314
1315 S1 a_t = ;
1316 S2 a_T = (TYPE) a_t;
1317 S3 res_T = a_T << CONST;
1318
1319 where type 'TYPE' is at least double the size of type 'type'.
1320
1321 Also detect cases where the shift result is immediately converted
1322 to another type 'result_type' that is no larger in size than 'TYPE'.
1323 In those cases we perform a widen-shift that directly results in
1324 'result_type', to avoid a possible over-widening situation:
1325
1326 type a_t;
1327 TYPE a_T, res_T;
1328 result_type res_result;
1329
1330 S1 a_t = ;
1331 S2 a_T = (TYPE) a_t;
1332 S3 res_T = a_T << CONST;
1333 S4 res_result = (result_type) res_T;
1334 '--> res_result' = a_t w<< CONST;
1335
1336 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1337 create an additional pattern stmt for S2 to create a variable of an
1338 intermediate type, and perform widen-shift on the intermediate type:
1339
1340 type a_t;
1341 interm_type a_it;
1342 TYPE a_T, res_T, res_T';
1343
1344 S1 a_t = ;
1345 S2 a_T = (TYPE) a_t;
1346 '--> a_it = (interm_type) a_t;
1347 S3 res_T = a_T << CONST;
1348 '--> res_T' = a_it <<* CONST;
1349
1350 Input/Output:
1351
1352 * STMTS: Contains a stmt from which the pattern search begins.
1353 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1354 in STMTS. When an intermediate type is used and a pattern statement is
1355 created for S2, we also put S2 here (before S3).
1356
1357 Output:
1358
1359 * TYPE_IN: The type of the input arguments to the pattern.
1360
1361 * TYPE_OUT: The type of the output of this pattern.
1362
1363 * Return value: A new stmt that will be used to replace the sequence of
1364 stmts that constitute the pattern. In this case it will be:
1365 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1366
1367 static gimple
1368 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1369 tree *type_in, tree *type_out)
1370 {
1371 gimple last_stmt = stmts->pop ();
1372 gimple def_stmt0;
1373 tree oprnd0, oprnd1;
1374 tree type, half_type0;
1375 gimple pattern_stmt;
1376 tree vectype, vectype_out = NULL_TREE;
1377 tree var;
1378 enum tree_code dummy_code;
1379 int dummy_int;
1380 vec<tree> dummy_vec;
1381 gimple use_stmt;
1382 bool promotion;
1383
1384 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1385 return NULL;
1386
1387 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1388 return NULL;
1389
1390 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1391 return NULL;
1392
1393 oprnd0 = gimple_assign_rhs1 (last_stmt);
1394 oprnd1 = gimple_assign_rhs2 (last_stmt);
1395 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1396 return NULL;
1397
1398 /* Check operand 0: it has to be defined by a type promotion. */
1399 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1400 &promotion)
1401 || !promotion)
1402 return NULL;
1403
1404 /* Check operand 1: has to be positive. We check that it fits the type
1405 in vect_handle_widen_op_by_const (). */
1406 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1407 return NULL;
1408
1409 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1410 type = gimple_expr_type (last_stmt);
1411
1412 /* Check for subsequent conversion to another type. */
1413 use_stmt = vect_single_imm_use (last_stmt);
1414 if (use_stmt && is_gimple_assign (use_stmt)
1415 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1416 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1417 {
1418 tree use_lhs = gimple_assign_lhs (use_stmt);
1419 tree use_type = TREE_TYPE (use_lhs);
1420
1421 if (INTEGRAL_TYPE_P (use_type)
1422 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1423 {
1424 last_stmt = use_stmt;
1425 type = use_type;
1426 }
1427 }
1428
1429 /* Check if this a widening operation. */
1430 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1431 &oprnd0, stmts,
1432 type, &half_type0, def_stmt0))
1433 return NULL;
1434
1435 /* Pattern detected. */
1436 if (dump_enabled_p ())
1437 dump_printf_loc (MSG_NOTE, vect_location,
1438 "vect_recog_widen_shift_pattern: detected:\n");
1439
1440 /* Check target support. */
1441 vectype = get_vectype_for_scalar_type (half_type0);
1442 vectype_out = get_vectype_for_scalar_type (type);
1443
1444 if (!vectype
1445 || !vectype_out
1446 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1447 vectype_out, vectype,
1448 &dummy_code, &dummy_code,
1449 &dummy_int, &dummy_vec))
1450 return NULL;
1451
1452 *type_in = vectype;
1453 *type_out = vectype_out;
1454
1455 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1456 var = vect_recog_temp_ssa_var (type, NULL);
1457 pattern_stmt =
1458 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1459
1460 if (dump_enabled_p ())
1461 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1462
1463 stmts->safe_push (last_stmt);
1464 return pattern_stmt;
1465 }
1466
1467 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1468
1469 type a_t, b_t, c_t;
1470
1471 S0 a_t = b_t r<< c_t;
1472
1473 Input/Output:
1474
1475 * STMTS: Contains a stmt from which the pattern search begins,
1476 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1477 with a sequence:
1478
1479 S1 d_t = -c_t;
1480 S2 e_t = d_t & (B - 1);
1481 S3 f_t = b_t << c_t;
1482 S4 g_t = b_t >> e_t;
1483 S0 a_t = f_t | g_t;
1484
1485 where B is element bitsize of type.
1486
1487 Output:
1488
1489 * TYPE_IN: The type of the input arguments to the pattern.
1490
1491 * TYPE_OUT: The type of the output of this pattern.
1492
1493 * Return value: A new stmt that will be used to replace the rotate
1494 S0 stmt. */
1495
1496 static gimple
1497 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1498 {
1499 gimple last_stmt = stmts->pop ();
1500 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1501 gimple pattern_stmt, def_stmt;
1502 enum tree_code rhs_code;
1503 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1504 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1505 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1506 enum vect_def_type dt;
1507 optab optab1, optab2;
1508 edge ext_def = NULL;
1509
1510 if (!is_gimple_assign (last_stmt))
1511 return NULL;
1512
1513 rhs_code = gimple_assign_rhs_code (last_stmt);
1514 switch (rhs_code)
1515 {
1516 case LROTATE_EXPR:
1517 case RROTATE_EXPR:
1518 break;
1519 default:
1520 return NULL;
1521 }
1522
1523 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1524 return NULL;
1525
1526 lhs = gimple_assign_lhs (last_stmt);
1527 oprnd0 = gimple_assign_rhs1 (last_stmt);
1528 type = TREE_TYPE (oprnd0);
1529 oprnd1 = gimple_assign_rhs2 (last_stmt);
1530 if (TREE_CODE (oprnd0) != SSA_NAME
1531 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1532 || !INTEGRAL_TYPE_P (type)
1533 || !TYPE_UNSIGNED (type))
1534 return NULL;
1535
1536 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1537 &def, &dt))
1538 return NULL;
1539
1540 if (dt != vect_internal_def
1541 && dt != vect_constant_def
1542 && dt != vect_external_def)
1543 return NULL;
1544
1545 vectype = get_vectype_for_scalar_type (type);
1546 if (vectype == NULL_TREE)
1547 return NULL;
1548
1549 /* If vector/vector or vector/scalar rotate is supported by the target,
1550 don't do anything here. */
1551 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1552 if (optab1
1553 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1554 return NULL;
1555
1556 if (bb_vinfo != NULL || dt != vect_internal_def)
1557 {
1558 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1559 if (optab2
1560 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1561 return NULL;
1562 }
1563
1564 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1565 don't do anything here either. */
1566 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1567 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1568 if (!optab1
1569 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1570 || !optab2
1571 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1572 {
1573 if (bb_vinfo == NULL && dt == vect_internal_def)
1574 return NULL;
1575 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1576 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1577 if (!optab1
1578 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1579 || !optab2
1580 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1581 return NULL;
1582 }
1583
1584 *type_in = vectype;
1585 *type_out = vectype;
1586 if (*type_in == NULL_TREE)
1587 return NULL;
1588
1589 if (dt == vect_external_def
1590 && TREE_CODE (oprnd1) == SSA_NAME
1591 && loop_vinfo)
1592 {
1593 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1594 ext_def = loop_preheader_edge (loop);
1595 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1596 {
1597 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1598 if (bb == NULL
1599 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1600 ext_def = NULL;
1601 }
1602 }
1603
1604 def = NULL_TREE;
1605 if (TREE_CODE (oprnd1) == INTEGER_CST
1606 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1607 def = oprnd1;
1608 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1609 {
1610 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1611 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1612 && TYPE_PRECISION (TREE_TYPE (rhs1))
1613 == TYPE_PRECISION (type))
1614 def = rhs1;
1615 }
1616
1617 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1618 if (def == NULL_TREE)
1619 {
1620 def = vect_recog_temp_ssa_var (type, NULL);
1621 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1622 NULL_TREE);
1623 if (ext_def)
1624 {
1625 basic_block new_bb
1626 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1627 gcc_assert (!new_bb);
1628 }
1629 else
1630 append_pattern_def_seq (stmt_vinfo, def_stmt);
1631 }
1632 stype = TREE_TYPE (def);
1633
1634 if (TREE_CODE (def) == INTEGER_CST)
1635 {
1636 if (!tree_fits_uhwi_p (def)
1637 || (unsigned HOST_WIDE_INT) tree_to_uhwi (def)
1638 >= GET_MODE_PRECISION (TYPE_MODE (type))
1639 || integer_zerop (def))
1640 return NULL;
1641 def2 = build_int_cst (stype,
1642 GET_MODE_PRECISION (TYPE_MODE (type))
1643 - tree_to_uhwi (def));
1644 }
1645 else
1646 {
1647 tree vecstype = get_vectype_for_scalar_type (stype);
1648 stmt_vec_info def_stmt_vinfo;
1649
1650 if (vecstype == NULL_TREE)
1651 return NULL;
1652 def2 = vect_recog_temp_ssa_var (stype, NULL);
1653 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1654 NULL_TREE);
1655 if (ext_def)
1656 {
1657 basic_block new_bb
1658 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1659 gcc_assert (!new_bb);
1660 }
1661 else
1662 {
1663 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1664 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1665 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1666 append_pattern_def_seq (stmt_vinfo, def_stmt);
1667 }
1668
1669 def2 = vect_recog_temp_ssa_var (stype, NULL);
1670 tree mask
1671 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1672 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1673 gimple_assign_lhs (def_stmt),
1674 mask);
1675 if (ext_def)
1676 {
1677 basic_block new_bb
1678 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1679 gcc_assert (!new_bb);
1680 }
1681 else
1682 {
1683 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1684 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1685 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1686 append_pattern_def_seq (stmt_vinfo, def_stmt);
1687 }
1688 }
1689
1690 var1 = vect_recog_temp_ssa_var (type, NULL);
1691 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1692 ? LSHIFT_EXPR : RSHIFT_EXPR,
1693 var1, oprnd0, def);
1694 append_pattern_def_seq (stmt_vinfo, def_stmt);
1695
1696 var2 = vect_recog_temp_ssa_var (type, NULL);
1697 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1698 ? RSHIFT_EXPR : LSHIFT_EXPR,
1699 var2, oprnd0, def2);
1700 append_pattern_def_seq (stmt_vinfo, def_stmt);
1701
1702 /* Pattern detected. */
1703 if (dump_enabled_p ())
1704 dump_printf_loc (MSG_NOTE, vect_location,
1705 "vect_recog_rotate_pattern: detected:\n");
1706
1707 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1708 var = vect_recog_temp_ssa_var (type, NULL);
1709 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1710
1711 if (dump_enabled_p ())
1712 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1713
1714 stmts->safe_push (last_stmt);
1715 return pattern_stmt;
1716 }
1717
1718 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1719 vectorized:
1720
1721 type a_t;
1722 TYPE b_T, res_T;
1723
1724 S1 a_t = ;
1725 S2 b_T = ;
1726 S3 res_T = b_T op a_t;
1727
1728 where type 'TYPE' is a type with different size than 'type',
1729 and op is <<, >> or rotate.
1730
1731 Also detect cases:
1732
1733 type a_t;
1734 TYPE b_T, c_T, res_T;
1735
1736 S0 c_T = ;
1737 S1 a_t = (type) c_T;
1738 S2 b_T = ;
1739 S3 res_T = b_T op a_t;
1740
1741 Input/Output:
1742
1743 * STMTS: Contains a stmt from which the pattern search begins,
1744 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1745 with a shift/rotate which has same type on both operands, in the
1746 second case just b_T op c_T, in the first case with added cast
1747 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1748
1749 Output:
1750
1751 * TYPE_IN: The type of the input arguments to the pattern.
1752
1753 * TYPE_OUT: The type of the output of this pattern.
1754
1755 * Return value: A new stmt that will be used to replace the shift/rotate
1756 S3 stmt. */
1757
1758 static gimple
1759 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1760 tree *type_in, tree *type_out)
1761 {
1762 gimple last_stmt = stmts->pop ();
1763 tree oprnd0, oprnd1, lhs, var;
1764 gimple pattern_stmt, def_stmt;
1765 enum tree_code rhs_code;
1766 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1767 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1768 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1769 enum vect_def_type dt;
1770 tree def;
1771
1772 if (!is_gimple_assign (last_stmt))
1773 return NULL;
1774
1775 rhs_code = gimple_assign_rhs_code (last_stmt);
1776 switch (rhs_code)
1777 {
1778 case LSHIFT_EXPR:
1779 case RSHIFT_EXPR:
1780 case LROTATE_EXPR:
1781 case RROTATE_EXPR:
1782 break;
1783 default:
1784 return NULL;
1785 }
1786
1787 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1788 return NULL;
1789
1790 lhs = gimple_assign_lhs (last_stmt);
1791 oprnd0 = gimple_assign_rhs1 (last_stmt);
1792 oprnd1 = gimple_assign_rhs2 (last_stmt);
1793 if (TREE_CODE (oprnd0) != SSA_NAME
1794 || TREE_CODE (oprnd1) != SSA_NAME
1795 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1796 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1797 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1798 || TYPE_PRECISION (TREE_TYPE (lhs))
1799 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1800 return NULL;
1801
1802 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1803 &def, &dt))
1804 return NULL;
1805
1806 if (dt != vect_internal_def)
1807 return NULL;
1808
1809 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1810 *type_out = *type_in;
1811 if (*type_in == NULL_TREE)
1812 return NULL;
1813
1814 def = NULL_TREE;
1815 if (gimple_assign_cast_p (def_stmt))
1816 {
1817 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1818 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1819 && TYPE_PRECISION (TREE_TYPE (rhs1))
1820 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1821 def = rhs1;
1822 }
1823
1824 if (def == NULL_TREE)
1825 {
1826 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1827 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1828 NULL_TREE);
1829 new_pattern_def_seq (stmt_vinfo, def_stmt);
1830 }
1831
1832 /* Pattern detected. */
1833 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_NOTE, vect_location,
1835 "vect_recog_vector_vector_shift_pattern: detected:\n");
1836
1837 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1838 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1839 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1840
1841 if (dump_enabled_p ())
1842 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1843
1844 stmts->safe_push (last_stmt);
1845 return pattern_stmt;
1846 }
1847
1848 /* Detect a signed division by a constant that wouldn't be
1849 otherwise vectorized:
1850
1851 type a_t, b_t;
1852
1853 S1 a_t = b_t / N;
1854
1855 where type 'type' is an integral type and N is a constant.
1856
1857 Similarly handle modulo by a constant:
1858
1859 S4 a_t = b_t % N;
1860
1861 Input/Output:
1862
1863 * STMTS: Contains a stmt from which the pattern search begins,
1864 i.e. the division stmt. S1 is replaced by if N is a power
1865 of two constant and type is signed:
1866 S3 y_t = b_t < 0 ? N - 1 : 0;
1867 S2 x_t = b_t + y_t;
1868 S1' a_t = x_t >> log2 (N);
1869
1870 S4 is replaced if N is a power of two constant and
1871 type is signed by (where *_T temporaries have unsigned type):
1872 S9 y_T = b_t < 0 ? -1U : 0U;
1873 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1874 S7 z_t = (type) z_T;
1875 S6 w_t = b_t + z_t;
1876 S5 x_t = w_t & (N - 1);
1877 S4' a_t = x_t - z_t;
1878
1879 Output:
1880
1881 * TYPE_IN: The type of the input arguments to the pattern.
1882
1883 * TYPE_OUT: The type of the output of this pattern.
1884
1885 * Return value: A new stmt that will be used to replace the division
1886 S1 or modulo S4 stmt. */
1887
1888 static gimple
1889 vect_recog_divmod_pattern (vec<gimple> *stmts,
1890 tree *type_in, tree *type_out)
1891 {
1892 gimple last_stmt = stmts->pop ();
1893 tree oprnd0, oprnd1, vectype, itype, cond;
1894 gimple pattern_stmt, def_stmt;
1895 enum tree_code rhs_code;
1896 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1897 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1898 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1899 optab optab;
1900 tree q;
1901 int dummy_int, prec;
1902 stmt_vec_info def_stmt_vinfo;
1903
1904 if (!is_gimple_assign (last_stmt))
1905 return NULL;
1906
1907 rhs_code = gimple_assign_rhs_code (last_stmt);
1908 switch (rhs_code)
1909 {
1910 case TRUNC_DIV_EXPR:
1911 case TRUNC_MOD_EXPR:
1912 break;
1913 default:
1914 return NULL;
1915 }
1916
1917 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1918 return NULL;
1919
1920 oprnd0 = gimple_assign_rhs1 (last_stmt);
1921 oprnd1 = gimple_assign_rhs2 (last_stmt);
1922 itype = TREE_TYPE (oprnd0);
1923 if (TREE_CODE (oprnd0) != SSA_NAME
1924 || TREE_CODE (oprnd1) != INTEGER_CST
1925 || TREE_CODE (itype) != INTEGER_TYPE
1926 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1927 return NULL;
1928
1929 vectype = get_vectype_for_scalar_type (itype);
1930 if (vectype == NULL_TREE)
1931 return NULL;
1932
1933 /* If the target can handle vectorized division or modulo natively,
1934 don't attempt to optimize this. */
1935 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1936 if (optab != unknown_optab)
1937 {
1938 enum machine_mode vec_mode = TYPE_MODE (vectype);
1939 int icode = (int) optab_handler (optab, vec_mode);
1940 if (icode != CODE_FOR_nothing)
1941 return NULL;
1942 }
1943
1944 prec = TYPE_PRECISION (itype);
1945 if (integer_pow2p (oprnd1))
1946 {
1947 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1948 return NULL;
1949
1950 /* Pattern detected. */
1951 if (dump_enabled_p ())
1952 dump_printf_loc (MSG_NOTE, vect_location,
1953 "vect_recog_divmod_pattern: detected:\n");
1954
1955 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1956 build_int_cst (itype, 0));
1957 if (rhs_code == TRUNC_DIV_EXPR)
1958 {
1959 tree var = vect_recog_temp_ssa_var (itype, NULL);
1960 tree shift;
1961 def_stmt
1962 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1963 fold_build2 (MINUS_EXPR, itype,
1964 oprnd1,
1965 build_int_cst (itype,
1966 1)),
1967 build_int_cst (itype, 0));
1968 new_pattern_def_seq (stmt_vinfo, def_stmt);
1969 var = vect_recog_temp_ssa_var (itype, NULL);
1970 def_stmt
1971 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1972 gimple_assign_lhs (def_stmt));
1973 append_pattern_def_seq (stmt_vinfo, def_stmt);
1974
1975 shift = build_int_cst (itype, tree_log2 (oprnd1));
1976 pattern_stmt
1977 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1978 vect_recog_temp_ssa_var (itype,
1979 NULL),
1980 var, shift);
1981 }
1982 else
1983 {
1984 tree signmask;
1985 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1986 if (compare_tree_int (oprnd1, 2) == 0)
1987 {
1988 signmask = vect_recog_temp_ssa_var (itype, NULL);
1989 def_stmt
1990 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1991 build_int_cst (itype, 1),
1992 build_int_cst (itype, 0));
1993 append_pattern_def_seq (stmt_vinfo, def_stmt);
1994 }
1995 else
1996 {
1997 tree utype
1998 = build_nonstandard_integer_type (prec, 1);
1999 tree vecutype = get_vectype_for_scalar_type (utype);
2000 tree shift
2001 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2002 - tree_log2 (oprnd1));
2003 tree var = vect_recog_temp_ssa_var (utype, NULL);
2004
2005 def_stmt
2006 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2007 build_int_cst (utype, -1),
2008 build_int_cst (utype, 0));
2009 def_stmt_vinfo
2010 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2011 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2012 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2013 append_pattern_def_seq (stmt_vinfo, def_stmt);
2014 var = vect_recog_temp_ssa_var (utype, NULL);
2015 def_stmt
2016 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2017 gimple_assign_lhs (def_stmt),
2018 shift);
2019 def_stmt_vinfo
2020 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2021 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2022 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2023 append_pattern_def_seq (stmt_vinfo, def_stmt);
2024 signmask = vect_recog_temp_ssa_var (itype, NULL);
2025 def_stmt
2026 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2027 NULL_TREE);
2028 append_pattern_def_seq (stmt_vinfo, def_stmt);
2029 }
2030 def_stmt
2031 = gimple_build_assign_with_ops (PLUS_EXPR,
2032 vect_recog_temp_ssa_var (itype,
2033 NULL),
2034 oprnd0, signmask);
2035 append_pattern_def_seq (stmt_vinfo, def_stmt);
2036 def_stmt
2037 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2038 vect_recog_temp_ssa_var (itype,
2039 NULL),
2040 gimple_assign_lhs (def_stmt),
2041 fold_build2 (MINUS_EXPR, itype,
2042 oprnd1,
2043 build_int_cst (itype,
2044 1)));
2045 append_pattern_def_seq (stmt_vinfo, def_stmt);
2046
2047 pattern_stmt
2048 = gimple_build_assign_with_ops (MINUS_EXPR,
2049 vect_recog_temp_ssa_var (itype,
2050 NULL),
2051 gimple_assign_lhs (def_stmt),
2052 signmask);
2053 }
2054
2055 if (dump_enabled_p ())
2056 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2057 0);
2058
2059 stmts->safe_push (last_stmt);
2060
2061 *type_in = vectype;
2062 *type_out = vectype;
2063 return pattern_stmt;
2064 }
2065
2066 if (!tree_fits_hwi_p (oprnd1, TYPE_SIGN (itype))
2067 || integer_zerop (oprnd1)
2068 || prec > HOST_BITS_PER_WIDE_INT)
2069 return NULL;
2070
2071 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2072 return NULL;
2073
2074 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2075
2076 if (TYPE_UNSIGNED (itype))
2077 {
2078 unsigned HOST_WIDE_INT mh, ml;
2079 int pre_shift, post_shift;
2080 unsigned HOST_WIDE_INT d = tree_to_uhwi (oprnd1)
2081 & GET_MODE_MASK (TYPE_MODE (itype));
2082 tree t1, t2, t3, t4;
2083
2084 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2085 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2086 return NULL;
2087
2088 /* Find a suitable multiplier and right shift count
2089 instead of multiplying with D. */
2090 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2091
2092 /* If the suggested multiplier is more than SIZE bits, we can do better
2093 for even divisors, using an initial right shift. */
2094 if (mh != 0 && (d & 1) == 0)
2095 {
2096 pre_shift = floor_log2 (d & -d);
2097 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2098 &ml, &post_shift, &dummy_int);
2099 gcc_assert (!mh);
2100 }
2101 else
2102 pre_shift = 0;
2103
2104 if (mh != 0)
2105 {
2106 if (post_shift - 1 >= prec)
2107 return NULL;
2108
2109 /* t1 = oprnd0 h* ml;
2110 t2 = oprnd0 - t1;
2111 t3 = t2 >> 1;
2112 t4 = t1 + t3;
2113 q = t4 >> (post_shift - 1); */
2114 t1 = vect_recog_temp_ssa_var (itype, NULL);
2115 def_stmt
2116 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2117 build_int_cst (itype, ml));
2118 append_pattern_def_seq (stmt_vinfo, def_stmt);
2119
2120 t2 = vect_recog_temp_ssa_var (itype, NULL);
2121 def_stmt
2122 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2123 append_pattern_def_seq (stmt_vinfo, def_stmt);
2124
2125 t3 = vect_recog_temp_ssa_var (itype, NULL);
2126 def_stmt
2127 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2128 integer_one_node);
2129 append_pattern_def_seq (stmt_vinfo, def_stmt);
2130
2131 t4 = vect_recog_temp_ssa_var (itype, NULL);
2132 def_stmt
2133 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2134
2135 if (post_shift != 1)
2136 {
2137 append_pattern_def_seq (stmt_vinfo, def_stmt);
2138
2139 q = vect_recog_temp_ssa_var (itype, NULL);
2140 pattern_stmt
2141 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2142 build_int_cst (itype,
2143 post_shift
2144 - 1));
2145 }
2146 else
2147 {
2148 q = t4;
2149 pattern_stmt = def_stmt;
2150 }
2151 }
2152 else
2153 {
2154 if (pre_shift >= prec || post_shift >= prec)
2155 return NULL;
2156
2157 /* t1 = oprnd0 >> pre_shift;
2158 t2 = t1 h* ml;
2159 q = t2 >> post_shift; */
2160 if (pre_shift)
2161 {
2162 t1 = vect_recog_temp_ssa_var (itype, NULL);
2163 def_stmt
2164 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2165 build_int_cst (NULL,
2166 pre_shift));
2167 append_pattern_def_seq (stmt_vinfo, def_stmt);
2168 }
2169 else
2170 t1 = oprnd0;
2171
2172 t2 = vect_recog_temp_ssa_var (itype, NULL);
2173 def_stmt
2174 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2175 build_int_cst (itype, ml));
2176
2177 if (post_shift)
2178 {
2179 append_pattern_def_seq (stmt_vinfo, def_stmt);
2180
2181 q = vect_recog_temp_ssa_var (itype, NULL);
2182 def_stmt
2183 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2184 build_int_cst (itype,
2185 post_shift));
2186 }
2187 else
2188 q = t2;
2189
2190 pattern_stmt = def_stmt;
2191 }
2192 }
2193 else
2194 {
2195 unsigned HOST_WIDE_INT ml;
2196 int post_shift;
2197 HOST_WIDE_INT d = tree_to_shwi (oprnd1);
2198 unsigned HOST_WIDE_INT abs_d;
2199 bool add = false;
2200 tree t1, t2, t3, t4;
2201
2202 /* Give up for -1. */
2203 if (d == -1)
2204 return NULL;
2205
2206 /* Since d might be INT_MIN, we have to cast to
2207 unsigned HOST_WIDE_INT before negating to avoid
2208 undefined signed overflow. */
2209 abs_d = (d >= 0
2210 ? (unsigned HOST_WIDE_INT) d
2211 : - (unsigned HOST_WIDE_INT) d);
2212
2213 /* n rem d = n rem -d */
2214 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2215 {
2216 d = abs_d;
2217 oprnd1 = build_int_cst (itype, abs_d);
2218 }
2219 else if (HOST_BITS_PER_WIDE_INT >= prec
2220 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2221 /* This case is not handled correctly below. */
2222 return NULL;
2223
2224 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2225 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2226 {
2227 add = true;
2228 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2229 }
2230 if (post_shift >= prec)
2231 return NULL;
2232
2233 /* t1 = oprnd0 h* ml; */
2234 t1 = vect_recog_temp_ssa_var (itype, NULL);
2235 def_stmt
2236 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2237 build_int_cst (itype, ml));
2238
2239 if (add)
2240 {
2241 /* t2 = t1 + oprnd0; */
2242 append_pattern_def_seq (stmt_vinfo, def_stmt);
2243 t2 = vect_recog_temp_ssa_var (itype, NULL);
2244 def_stmt
2245 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2246 }
2247 else
2248 t2 = t1;
2249
2250 if (post_shift)
2251 {
2252 /* t3 = t2 >> post_shift; */
2253 append_pattern_def_seq (stmt_vinfo, def_stmt);
2254 t3 = vect_recog_temp_ssa_var (itype, NULL);
2255 def_stmt
2256 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2257 build_int_cst (itype, post_shift));
2258 }
2259 else
2260 t3 = t2;
2261
2262 widest_int oprnd0_min, oprnd0_max;
2263 int msb = 1;
2264 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2265 {
2266 if (!wi::neg_p (oprnd0_min))
2267 msb = 0;
2268 else if (wi::neg_p (oprnd0_max))
2269 msb = -1;
2270 }
2271
2272 if (msb == 0 && d >= 0)
2273 {
2274 /* q = t3; */
2275 q = t3;
2276 pattern_stmt = def_stmt;
2277 }
2278 else
2279 {
2280 /* t4 = oprnd0 >> (prec - 1);
2281 or if we know from VRP that oprnd0 >= 0
2282 t4 = 0;
2283 or if we know from VRP that oprnd0 < 0
2284 t4 = -1; */
2285 append_pattern_def_seq (stmt_vinfo, def_stmt);
2286 t4 = vect_recog_temp_ssa_var (itype, NULL);
2287 if (msb != 1)
2288 def_stmt
2289 = gimple_build_assign_with_ops (INTEGER_CST,
2290 t4, build_int_cst (itype, msb),
2291 NULL_TREE);
2292 else
2293 def_stmt
2294 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2295 build_int_cst (itype, prec - 1));
2296 append_pattern_def_seq (stmt_vinfo, def_stmt);
2297
2298 /* q = t3 - t4; or q = t4 - t3; */
2299 q = vect_recog_temp_ssa_var (itype, NULL);
2300 pattern_stmt
2301 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2302 d < 0 ? t3 : t4);
2303 }
2304 }
2305
2306 if (rhs_code == TRUNC_MOD_EXPR)
2307 {
2308 tree r, t1;
2309
2310 /* We divided. Now finish by:
2311 t1 = q * oprnd1;
2312 r = oprnd0 - t1; */
2313 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2314
2315 t1 = vect_recog_temp_ssa_var (itype, NULL);
2316 def_stmt
2317 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2318 append_pattern_def_seq (stmt_vinfo, def_stmt);
2319
2320 r = vect_recog_temp_ssa_var (itype, NULL);
2321 pattern_stmt
2322 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2323 }
2324
2325 /* Pattern detected. */
2326 if (dump_enabled_p ())
2327 {
2328 dump_printf_loc (MSG_NOTE, vect_location,
2329 "vect_recog_divmod_pattern: detected: ");
2330 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2331 dump_printf (MSG_NOTE, "\n");
2332 }
2333
2334 stmts->safe_push (last_stmt);
2335
2336 *type_in = vectype;
2337 *type_out = vectype;
2338 return pattern_stmt;
2339 }
2340
2341 /* Function vect_recog_mixed_size_cond_pattern
2342
2343 Try to find the following pattern:
2344
2345 type x_t, y_t;
2346 TYPE a_T, b_T, c_T;
2347 loop:
2348 S1 a_T = x_t CMP y_t ? b_T : c_T;
2349
2350 where type 'TYPE' is an integral type which has different size
2351 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2352 than 'type', the constants need to fit into an integer type
2353 with the same width as 'type') or results of conversion from 'type'.
2354
2355 Input:
2356
2357 * LAST_STMT: A stmt from which the pattern search begins.
2358
2359 Output:
2360
2361 * TYPE_IN: The type of the input arguments to the pattern.
2362
2363 * TYPE_OUT: The type of the output of this pattern.
2364
2365 * Return value: A new stmt that will be used to replace the pattern.
2366 Additionally a def_stmt is added.
2367
2368 a_it = x_t CMP y_t ? b_it : c_it;
2369 a_T = (TYPE) a_it; */
2370
2371 static gimple
2372 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2373 tree *type_out)
2374 {
2375 gimple last_stmt = (*stmts)[0];
2376 tree cond_expr, then_clause, else_clause;
2377 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2378 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2379 enum machine_mode cmpmode;
2380 gimple pattern_stmt, def_stmt;
2381 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2382 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2383 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2384 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2385 bool promotion;
2386 tree comp_scalar_type;
2387
2388 if (!is_gimple_assign (last_stmt)
2389 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2390 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2391 return NULL;
2392
2393 cond_expr = gimple_assign_rhs1 (last_stmt);
2394 then_clause = gimple_assign_rhs2 (last_stmt);
2395 else_clause = gimple_assign_rhs3 (last_stmt);
2396
2397 if (!COMPARISON_CLASS_P (cond_expr))
2398 return NULL;
2399
2400 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2401 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2402 if (comp_vectype == NULL_TREE)
2403 return NULL;
2404
2405 type = gimple_expr_type (last_stmt);
2406 if (types_compatible_p (type, comp_scalar_type)
2407 || ((TREE_CODE (then_clause) != INTEGER_CST
2408 || TREE_CODE (else_clause) != INTEGER_CST)
2409 && !INTEGRAL_TYPE_P (comp_scalar_type))
2410 || !INTEGRAL_TYPE_P (type))
2411 return NULL;
2412
2413 if ((TREE_CODE (then_clause) != INTEGER_CST
2414 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2415 &def_stmt0, &promotion))
2416 || (TREE_CODE (else_clause) != INTEGER_CST
2417 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2418 &def_stmt1, &promotion)))
2419 return NULL;
2420
2421 if (orig_type0 && orig_type1
2422 && !types_compatible_p (orig_type0, orig_type1))
2423 return NULL;
2424
2425 if (orig_type0)
2426 {
2427 if (!types_compatible_p (orig_type0, comp_scalar_type))
2428 return NULL;
2429 then_clause = gimple_assign_rhs1 (def_stmt0);
2430 itype = orig_type0;
2431 }
2432
2433 if (orig_type1)
2434 {
2435 if (!types_compatible_p (orig_type1, comp_scalar_type))
2436 return NULL;
2437 else_clause = gimple_assign_rhs1 (def_stmt1);
2438 itype = orig_type1;
2439 }
2440
2441 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2442
2443 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2444 return NULL;
2445
2446 vectype = get_vectype_for_scalar_type (type);
2447 if (vectype == NULL_TREE)
2448 return NULL;
2449
2450 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2451 return NULL;
2452
2453 if (itype == NULL_TREE)
2454 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2455 TYPE_UNSIGNED (type));
2456
2457 if (itype == NULL_TREE
2458 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2459 return NULL;
2460
2461 vecitype = get_vectype_for_scalar_type (itype);
2462 if (vecitype == NULL_TREE)
2463 return NULL;
2464
2465 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2466 return NULL;
2467
2468 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2469 {
2470 if ((TREE_CODE (then_clause) == INTEGER_CST
2471 && !int_fits_type_p (then_clause, itype))
2472 || (TREE_CODE (else_clause) == INTEGER_CST
2473 && !int_fits_type_p (else_clause, itype)))
2474 return NULL;
2475 }
2476
2477 def_stmt
2478 = gimple_build_assign_with_ops (COND_EXPR,
2479 vect_recog_temp_ssa_var (itype, NULL),
2480 unshare_expr (cond_expr),
2481 fold_convert (itype, then_clause),
2482 fold_convert (itype, else_clause));
2483 pattern_stmt
2484 = gimple_build_assign_with_ops (NOP_EXPR,
2485 vect_recog_temp_ssa_var (type, NULL),
2486 gimple_assign_lhs (def_stmt), NULL_TREE);
2487
2488 new_pattern_def_seq (stmt_vinfo, def_stmt);
2489 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2490 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2491 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2492 *type_in = vecitype;
2493 *type_out = vectype;
2494
2495 if (dump_enabled_p ())
2496 dump_printf_loc (MSG_NOTE, vect_location,
2497 "vect_recog_mixed_size_cond_pattern: detected:\n");
2498
2499 return pattern_stmt;
2500 }
2501
2502
2503 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2504 true if bool VAR can be optimized that way. */
2505
2506 static bool
2507 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2508 {
2509 gimple def_stmt;
2510 enum vect_def_type dt;
2511 tree def, rhs1;
2512 enum tree_code rhs_code;
2513
2514 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2515 &dt))
2516 return false;
2517
2518 if (dt != vect_internal_def)
2519 return false;
2520
2521 if (!is_gimple_assign (def_stmt))
2522 return false;
2523
2524 if (!has_single_use (def))
2525 return false;
2526
2527 rhs1 = gimple_assign_rhs1 (def_stmt);
2528 rhs_code = gimple_assign_rhs_code (def_stmt);
2529 switch (rhs_code)
2530 {
2531 case SSA_NAME:
2532 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2533
2534 CASE_CONVERT:
2535 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2536 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2537 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2538 return false;
2539 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2540
2541 case BIT_NOT_EXPR:
2542 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2543
2544 case BIT_AND_EXPR:
2545 case BIT_IOR_EXPR:
2546 case BIT_XOR_EXPR:
2547 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2548 return false;
2549 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2550 bb_vinfo);
2551
2552 default:
2553 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2554 {
2555 tree vecitype, comp_vectype;
2556
2557 /* If the comparison can throw, then is_gimple_condexpr will be
2558 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2559 if (stmt_could_throw_p (def_stmt))
2560 return false;
2561
2562 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2563 if (comp_vectype == NULL_TREE)
2564 return false;
2565
2566 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2567 {
2568 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2569 tree itype
2570 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2571 vecitype = get_vectype_for_scalar_type (itype);
2572 if (vecitype == NULL_TREE)
2573 return false;
2574 }
2575 else
2576 vecitype = comp_vectype;
2577 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2578 }
2579 return false;
2580 }
2581 }
2582
2583
2584 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2585 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2586 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2587
2588 static tree
2589 adjust_bool_pattern_cast (tree type, tree var)
2590 {
2591 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2592 gimple cast_stmt, pattern_stmt;
2593
2594 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2595 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2596 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2597 cast_stmt
2598 = gimple_build_assign_with_ops (NOP_EXPR,
2599 vect_recog_temp_ssa_var (type, NULL),
2600 gimple_assign_lhs (pattern_stmt),
2601 NULL_TREE);
2602 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2603 return gimple_assign_lhs (cast_stmt);
2604 }
2605
2606
2607 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2608 recursively. VAR is an SSA_NAME that should be transformed from bool
2609 to a wider integer type, OUT_TYPE is the desired final integer type of
2610 the whole pattern, TRUEVAL should be NULL unless optimizing
2611 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2612 in the then_clause, STMTS is where statements with added pattern stmts
2613 should be pushed to. */
2614
2615 static tree
2616 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2617 vec<gimple> *stmts)
2618 {
2619 gimple stmt = SSA_NAME_DEF_STMT (var);
2620 enum tree_code rhs_code, def_rhs_code;
2621 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2622 location_t loc;
2623 gimple pattern_stmt, def_stmt;
2624
2625 rhs1 = gimple_assign_rhs1 (stmt);
2626 rhs2 = gimple_assign_rhs2 (stmt);
2627 rhs_code = gimple_assign_rhs_code (stmt);
2628 loc = gimple_location (stmt);
2629 switch (rhs_code)
2630 {
2631 case SSA_NAME:
2632 CASE_CONVERT:
2633 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2634 itype = TREE_TYPE (irhs1);
2635 pattern_stmt
2636 = gimple_build_assign_with_ops (SSA_NAME,
2637 vect_recog_temp_ssa_var (itype, NULL),
2638 irhs1, NULL_TREE);
2639 break;
2640
2641 case BIT_NOT_EXPR:
2642 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2643 itype = TREE_TYPE (irhs1);
2644 pattern_stmt
2645 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2646 vect_recog_temp_ssa_var (itype, NULL),
2647 irhs1, build_int_cst (itype, 1));
2648 break;
2649
2650 case BIT_AND_EXPR:
2651 /* Try to optimize x = y & (a < b ? 1 : 0); into
2652 x = (a < b ? y : 0);
2653
2654 E.g. for:
2655 bool a_b, b_b, c_b;
2656 TYPE d_T;
2657
2658 S1 a_b = x1 CMP1 y1;
2659 S2 b_b = x2 CMP2 y2;
2660 S3 c_b = a_b & b_b;
2661 S4 d_T = (TYPE) c_b;
2662
2663 we would normally emit:
2664
2665 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2666 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2667 S3' c_T = a_T & b_T;
2668 S4' d_T = c_T;
2669
2670 but we can save one stmt by using the
2671 result of one of the COND_EXPRs in the other COND_EXPR and leave
2672 BIT_AND_EXPR stmt out:
2673
2674 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2675 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2676 S4' f_T = c_T;
2677
2678 At least when VEC_COND_EXPR is implemented using masks
2679 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2680 computes the comparison masks and ands it, in one case with
2681 all ones vector, in the other case with a vector register.
2682 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2683 often more expensive. */
2684 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2685 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2686 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2687 {
2688 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2689 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2690 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2691 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2692 {
2693 gimple tstmt;
2694 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2695 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2696 tstmt = stmts->pop ();
2697 gcc_assert (tstmt == def_stmt);
2698 stmts->quick_push (stmt);
2699 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2700 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2701 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2702 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2703 return irhs2;
2704 }
2705 else
2706 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2707 goto and_ior_xor;
2708 }
2709 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2710 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2711 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2712 {
2713 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2714 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2715 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2716 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2717 {
2718 gimple tstmt;
2719 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2720 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2721 tstmt = stmts->pop ();
2722 gcc_assert (tstmt == def_stmt);
2723 stmts->quick_push (stmt);
2724 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2725 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2726 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2727 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2728 return irhs1;
2729 }
2730 else
2731 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2732 goto and_ior_xor;
2733 }
2734 /* FALLTHRU */
2735 case BIT_IOR_EXPR:
2736 case BIT_XOR_EXPR:
2737 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2738 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2739 and_ior_xor:
2740 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2741 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2742 {
2743 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2744 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2745 int out_prec = TYPE_PRECISION (out_type);
2746 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2747 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2748 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2749 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2750 else
2751 {
2752 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2753 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2754 }
2755 }
2756 itype = TREE_TYPE (irhs1);
2757 pattern_stmt
2758 = gimple_build_assign_with_ops (rhs_code,
2759 vect_recog_temp_ssa_var (itype, NULL),
2760 irhs1, irhs2);
2761 break;
2762
2763 default:
2764 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2765 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2766 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2767 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2768 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2769 {
2770 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2771 itype
2772 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2773 }
2774 else
2775 itype = TREE_TYPE (rhs1);
2776 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2777 if (trueval == NULL_TREE)
2778 trueval = build_int_cst (itype, 1);
2779 else
2780 gcc_checking_assert (useless_type_conversion_p (itype,
2781 TREE_TYPE (trueval)));
2782 pattern_stmt
2783 = gimple_build_assign_with_ops (COND_EXPR,
2784 vect_recog_temp_ssa_var (itype, NULL),
2785 cond_expr, trueval,
2786 build_int_cst (itype, 0));
2787 break;
2788 }
2789
2790 stmts->safe_push (stmt);
2791 gimple_set_location (pattern_stmt, loc);
2792 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2793 return gimple_assign_lhs (pattern_stmt);
2794 }
2795
2796
2797 /* Function vect_recog_bool_pattern
2798
2799 Try to find pattern like following:
2800
2801 bool a_b, b_b, c_b, d_b, e_b;
2802 TYPE f_T;
2803 loop:
2804 S1 a_b = x1 CMP1 y1;
2805 S2 b_b = x2 CMP2 y2;
2806 S3 c_b = a_b & b_b;
2807 S4 d_b = x3 CMP3 y3;
2808 S5 e_b = c_b | d_b;
2809 S6 f_T = (TYPE) e_b;
2810
2811 where type 'TYPE' is an integral type.
2812
2813 Input:
2814
2815 * LAST_STMT: A stmt at the end from which the pattern
2816 search begins, i.e. cast of a bool to
2817 an integer type.
2818
2819 Output:
2820
2821 * TYPE_IN: The type of the input arguments to the pattern.
2822
2823 * TYPE_OUT: The type of the output of this pattern.
2824
2825 * Return value: A new stmt that will be used to replace the pattern.
2826
2827 Assuming size of TYPE is the same as size of all comparisons
2828 (otherwise some casts would be added where needed), the above
2829 sequence we create related pattern stmts:
2830 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2831 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2832 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2833 S5' e_T = c_T | d_T;
2834 S6' f_T = e_T;
2835
2836 Instead of the above S3' we could emit:
2837 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2838 S3' c_T = a_T | b_T;
2839 but the above is more efficient. */
2840
2841 static gimple
2842 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2843 tree *type_out)
2844 {
2845 gimple last_stmt = stmts->pop ();
2846 enum tree_code rhs_code;
2847 tree var, lhs, rhs, vectype;
2848 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2849 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2850 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2851 gimple pattern_stmt;
2852
2853 if (!is_gimple_assign (last_stmt))
2854 return NULL;
2855
2856 var = gimple_assign_rhs1 (last_stmt);
2857 lhs = gimple_assign_lhs (last_stmt);
2858
2859 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2860 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2861 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2862 return NULL;
2863
2864 rhs_code = gimple_assign_rhs_code (last_stmt);
2865 if (CONVERT_EXPR_CODE_P (rhs_code))
2866 {
2867 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2868 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2869 return NULL;
2870 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2871 if (vectype == NULL_TREE)
2872 return NULL;
2873
2874 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2875 return NULL;
2876
2877 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2878 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2879 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2880 pattern_stmt
2881 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2882 else
2883 pattern_stmt
2884 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2885 *type_out = vectype;
2886 *type_in = vectype;
2887 stmts->safe_push (last_stmt);
2888 if (dump_enabled_p ())
2889 dump_printf_loc (MSG_NOTE, vect_location,
2890 "vect_recog_bool_pattern: detected:\n");
2891
2892 return pattern_stmt;
2893 }
2894 else if (rhs_code == SSA_NAME
2895 && STMT_VINFO_DATA_REF (stmt_vinfo))
2896 {
2897 stmt_vec_info pattern_stmt_info;
2898 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2899 gcc_assert (vectype != NULL_TREE);
2900 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2901 return NULL;
2902 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2903 return NULL;
2904
2905 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2906 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2907 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2908 {
2909 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2910 gimple cast_stmt
2911 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2912 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2913 rhs = rhs2;
2914 }
2915 pattern_stmt
2916 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2917 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2918 bb_vinfo);
2919 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2920 STMT_VINFO_DATA_REF (pattern_stmt_info)
2921 = STMT_VINFO_DATA_REF (stmt_vinfo);
2922 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2923 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2924 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2925 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2926 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2927 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2928 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2929 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2930 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2931 *type_out = vectype;
2932 *type_in = vectype;
2933 stmts->safe_push (last_stmt);
2934 if (dump_enabled_p ())
2935 dump_printf_loc (MSG_NOTE, vect_location,
2936 "vect_recog_bool_pattern: detected:\n");
2937 return pattern_stmt;
2938 }
2939 else
2940 return NULL;
2941 }
2942
2943
2944 /* Mark statements that are involved in a pattern. */
2945
2946 static inline void
2947 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2948 tree pattern_vectype)
2949 {
2950 stmt_vec_info pattern_stmt_info, def_stmt_info;
2951 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2952 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2953 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2954 gimple def_stmt;
2955
2956 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2957 if (pattern_stmt_info == NULL)
2958 {
2959 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2960 bb_vinfo);
2961 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2962 }
2963 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2964
2965 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2966 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2967 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2968 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2969 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2970 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2971 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2972 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2973 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2974 {
2975 gimple_stmt_iterator si;
2976 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2977 !gsi_end_p (si); gsi_next (&si))
2978 {
2979 def_stmt = gsi_stmt (si);
2980 def_stmt_info = vinfo_for_stmt (def_stmt);
2981 if (def_stmt_info == NULL)
2982 {
2983 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2984 bb_vinfo);
2985 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2986 }
2987 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2988 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2989 STMT_VINFO_DEF_TYPE (def_stmt_info)
2990 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2991 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2992 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2993 }
2994 }
2995 }
2996
2997 /* Function vect_pattern_recog_1
2998
2999 Input:
3000 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3001 computation pattern.
3002 STMT: A stmt from which the pattern search should start.
3003
3004 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3005 expression that computes the same functionality and can be used to
3006 replace the sequence of stmts that are involved in the pattern.
3007
3008 Output:
3009 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3010 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3011 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3012 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3013 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3014 to the available target pattern.
3015
3016 This function also does some bookkeeping, as explained in the documentation
3017 for vect_recog_pattern. */
3018
3019 static void
3020 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3021 gimple_stmt_iterator si,
3022 vec<gimple> *stmts_to_replace)
3023 {
3024 gimple stmt = gsi_stmt (si), pattern_stmt;
3025 stmt_vec_info stmt_info;
3026 loop_vec_info loop_vinfo;
3027 tree pattern_vectype;
3028 tree type_in, type_out;
3029 enum tree_code code;
3030 int i;
3031 gimple next;
3032
3033 stmts_to_replace->truncate (0);
3034 stmts_to_replace->quick_push (stmt);
3035 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3036 if (!pattern_stmt)
3037 return;
3038
3039 stmt = stmts_to_replace->last ();
3040 stmt_info = vinfo_for_stmt (stmt);
3041 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3042
3043 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3044 {
3045 /* No need to check target support (already checked by the pattern
3046 recognition function). */
3047 pattern_vectype = type_out ? type_out : type_in;
3048 }
3049 else
3050 {
3051 enum machine_mode vec_mode;
3052 enum insn_code icode;
3053 optab optab;
3054
3055 /* Check target support */
3056 type_in = get_vectype_for_scalar_type (type_in);
3057 if (!type_in)
3058 return;
3059 if (type_out)
3060 type_out = get_vectype_for_scalar_type (type_out);
3061 else
3062 type_out = type_in;
3063 if (!type_out)
3064 return;
3065 pattern_vectype = type_out;
3066
3067 if (is_gimple_assign (pattern_stmt))
3068 code = gimple_assign_rhs_code (pattern_stmt);
3069 else
3070 {
3071 gcc_assert (is_gimple_call (pattern_stmt));
3072 code = CALL_EXPR;
3073 }
3074
3075 optab = optab_for_tree_code (code, type_in, optab_default);
3076 vec_mode = TYPE_MODE (type_in);
3077 if (!optab
3078 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3079 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3080 return;
3081 }
3082
3083 /* Found a vectorizable pattern. */
3084 if (dump_enabled_p ())
3085 {
3086 dump_printf_loc (MSG_NOTE, vect_location,
3087 "pattern recognized: ");
3088 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3089 dump_printf (MSG_NOTE, "\n");
3090 }
3091
3092 /* Mark the stmts that are involved in the pattern. */
3093 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3094
3095 /* Patterns cannot be vectorized using SLP, because they change the order of
3096 computation. */
3097 if (loop_vinfo)
3098 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3099 if (next == stmt)
3100 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3101
3102 /* It is possible that additional pattern stmts are created and inserted in
3103 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3104 relevant statements. */
3105 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3106 && (unsigned) i < (stmts_to_replace->length () - 1);
3107 i++)
3108 {
3109 stmt_info = vinfo_for_stmt (stmt);
3110 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3111 if (dump_enabled_p ())
3112 {
3113 dump_printf_loc (MSG_NOTE, vect_location,
3114 "additional pattern stmt: ");
3115 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3116 dump_printf (MSG_NOTE, "\n");
3117 }
3118
3119 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3120 }
3121 }
3122
3123
3124 /* Function vect_pattern_recog
3125
3126 Input:
3127 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3128 computation idioms.
3129
3130 Output - for each computation idiom that is detected we create a new stmt
3131 that provides the same functionality and that can be vectorized. We
3132 also record some information in the struct_stmt_info of the relevant
3133 stmts, as explained below:
3134
3135 At the entry to this function we have the following stmts, with the
3136 following initial value in the STMT_VINFO fields:
3137
3138 stmt in_pattern_p related_stmt vec_stmt
3139 S1: a_i = .... - - -
3140 S2: a_2 = ..use(a_i).. - - -
3141 S3: a_1 = ..use(a_2).. - - -
3142 S4: a_0 = ..use(a_1).. - - -
3143 S5: ... = ..use(a_0).. - - -
3144
3145 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3146 represented by a single stmt. We then:
3147 - create a new stmt S6 equivalent to the pattern (the stmt is not
3148 inserted into the code)
3149 - fill in the STMT_VINFO fields as follows:
3150
3151 in_pattern_p related_stmt vec_stmt
3152 S1: a_i = .... - - -
3153 S2: a_2 = ..use(a_i).. - - -
3154 S3: a_1 = ..use(a_2).. - - -
3155 S4: a_0 = ..use(a_1).. true S6 -
3156 '---> S6: a_new = .... - S4 -
3157 S5: ... = ..use(a_0).. - - -
3158
3159 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3160 to each other through the RELATED_STMT field).
3161
3162 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3163 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3164 remain irrelevant unless used by stmts other than S4.
3165
3166 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3167 (because they are marked as irrelevant). It will vectorize S6, and record
3168 a pointer to the new vector stmt VS6 from S6 (as usual).
3169 S4 will be skipped, and S5 will be vectorized as usual:
3170
3171 in_pattern_p related_stmt vec_stmt
3172 S1: a_i = .... - - -
3173 S2: a_2 = ..use(a_i).. - - -
3174 S3: a_1 = ..use(a_2).. - - -
3175 > VS6: va_new = .... - - -
3176 S4: a_0 = ..use(a_1).. true S6 VS6
3177 '---> S6: a_new = .... - S4 VS6
3178 > VS5: ... = ..vuse(va_new).. - - -
3179 S5: ... = ..use(a_0).. - - -
3180
3181 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3182 elsewhere), and we'll end up with:
3183
3184 VS6: va_new = ....
3185 VS5: ... = ..vuse(va_new)..
3186
3187 In case of more than one pattern statements, e.g., widen-mult with
3188 intermediate type:
3189
3190 S1 a_t = ;
3191 S2 a_T = (TYPE) a_t;
3192 '--> S3: a_it = (interm_type) a_t;
3193 S4 prod_T = a_T * CONST;
3194 '--> S5: prod_T' = a_it w* CONST;
3195
3196 there may be other users of a_T outside the pattern. In that case S2 will
3197 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3198 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3199 be recorded in S3. */
3200
3201 void
3202 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3203 {
3204 struct loop *loop;
3205 basic_block *bbs;
3206 unsigned int nbbs;
3207 gimple_stmt_iterator si;
3208 unsigned int i, j;
3209 vect_recog_func_ptr vect_recog_func;
3210 stack_vec<gimple, 1> stmts_to_replace;
3211 gimple stmt;
3212
3213 if (dump_enabled_p ())
3214 dump_printf_loc (MSG_NOTE, vect_location,
3215 "=== vect_pattern_recog ===\n");
3216
3217 if (loop_vinfo)
3218 {
3219 loop = LOOP_VINFO_LOOP (loop_vinfo);
3220 bbs = LOOP_VINFO_BBS (loop_vinfo);
3221 nbbs = loop->num_nodes;
3222 }
3223 else
3224 {
3225 bbs = &BB_VINFO_BB (bb_vinfo);
3226 nbbs = 1;
3227 }
3228
3229 /* Scan through the loop stmts, applying the pattern recognition
3230 functions starting at each stmt visited: */
3231 for (i = 0; i < nbbs; i++)
3232 {
3233 basic_block bb = bbs[i];
3234 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3235 {
3236 if (bb_vinfo && (stmt = gsi_stmt (si))
3237 && vinfo_for_stmt (stmt)
3238 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3239 continue;
3240
3241 /* Scan over all generic vect_recog_xxx_pattern functions. */
3242 for (j = 0; j < NUM_PATTERNS; j++)
3243 {
3244 vect_recog_func = vect_vect_recog_func_ptrs[j];
3245 vect_pattern_recog_1 (vect_recog_func, si,
3246 &stmts_to_replace);
3247 }
3248 }
3249 }
3250 }