]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-patterns.c
cuintp.c: Replace host_integerp (..., 0) with tree_fits_shwi_p throughout.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "gimple.h"
31 #include "gimplify.h"
32 #include "gimple-iterator.h"
33 #include "gimple-ssa.h"
34 #include "tree-phinodes.h"
35 #include "ssa-iterators.h"
36 #include "tree-ssanames.h"
37 #include "cfgloop.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "params.h"
41 #include "tree-data-ref.h"
42 #include "tree-vectorizer.h"
43 #include "recog.h" /* FIXME: for insn_data */
44 #include "diagnostic-core.h"
45 #include "dumpfile.h"
46
47 /* Pattern recognition functions */
48 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
49 tree *);
50 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
51 tree *);
52 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
53 tree *);
54 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
55 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
56 tree *);
57 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
58 tree *, tree *);
59 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
60 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
61 tree *, tree *);
62 static gimple vect_recog_divmod_pattern (vec<gimple> *,
63 tree *, tree *);
64 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
65 tree *, tree *);
66 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
67 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
68 vect_recog_widen_mult_pattern,
69 vect_recog_widen_sum_pattern,
70 vect_recog_dot_prod_pattern,
71 vect_recog_pow_pattern,
72 vect_recog_widen_shift_pattern,
73 vect_recog_over_widening_pattern,
74 vect_recog_rotate_pattern,
75 vect_recog_vector_vector_shift_pattern,
76 vect_recog_divmod_pattern,
77 vect_recog_mixed_size_cond_pattern,
78 vect_recog_bool_pattern};
79
80 static inline void
81 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
82 {
83 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
84 stmt);
85 }
86
87 static inline void
88 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
89 {
90 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
91 append_pattern_def_seq (stmt_info, stmt);
92 }
93
94 /* Check whether STMT2 is in the same loop or basic block as STMT1.
95 Which of the two applies depends on whether we're currently doing
96 loop-based or basic-block-based vectorization, as determined by
97 the vinfo_for_stmt for STMT1 (which must be defined).
98
99 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
100 to be defined as well. */
101
102 static bool
103 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
104 {
105 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
106 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
107 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
108
109 if (!gimple_bb (stmt2))
110 return false;
111
112 if (loop_vinfo)
113 {
114 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
115 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
116 return false;
117 }
118 else
119 {
120 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
121 || gimple_code (stmt2) == GIMPLE_PHI)
122 return false;
123 }
124
125 gcc_assert (vinfo_for_stmt (stmt2));
126 return true;
127 }
128
129 /* If the LHS of DEF_STMT has a single use, and that statement is
130 in the same loop or basic block, return it. */
131
132 static gimple
133 vect_single_imm_use (gimple def_stmt)
134 {
135 tree lhs = gimple_assign_lhs (def_stmt);
136 use_operand_p use_p;
137 gimple use_stmt;
138
139 if (!single_imm_use (lhs, &use_p, &use_stmt))
140 return NULL;
141
142 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
143 return NULL;
144
145 return use_stmt;
146 }
147
148 /* Check whether NAME, an ssa-name used in USE_STMT,
149 is a result of a type promotion or demotion, such that:
150 DEF_STMT: NAME = NOP (name0)
151 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
152 If CHECK_SIGN is TRUE, check that either both types are signed or both are
153 unsigned. */
154
155 static bool
156 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
157 tree *orig_type, gimple *def_stmt, bool *promotion)
158 {
159 tree dummy;
160 gimple dummy_gimple;
161 loop_vec_info loop_vinfo;
162 stmt_vec_info stmt_vinfo;
163 tree type = TREE_TYPE (name);
164 tree oprnd0;
165 enum vect_def_type dt;
166 tree def;
167 bb_vec_info bb_vinfo;
168
169 stmt_vinfo = vinfo_for_stmt (use_stmt);
170 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
171 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
172 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
173 &def, &dt))
174 return false;
175
176 if (dt != vect_internal_def
177 && dt != vect_external_def && dt != vect_constant_def)
178 return false;
179
180 if (!*def_stmt)
181 return false;
182
183 if (!is_gimple_assign (*def_stmt))
184 return false;
185
186 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
187 return false;
188
189 oprnd0 = gimple_assign_rhs1 (*def_stmt);
190
191 *orig_type = TREE_TYPE (oprnd0);
192 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
193 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
194 return false;
195
196 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
197 *promotion = true;
198 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
199 *promotion = false;
200 else
201 return false;
202
203 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
204 bb_vinfo, &dummy_gimple, &dummy, &dt))
205 return false;
206
207 return true;
208 }
209
210 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
211 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
212
213 static tree
214 vect_recog_temp_ssa_var (tree type, gimple stmt)
215 {
216 return make_temp_ssa_name (type, stmt, "patt");
217 }
218
219 /* Function vect_recog_dot_prod_pattern
220
221 Try to find the following pattern:
222
223 type x_t, y_t;
224 TYPE1 prod;
225 TYPE2 sum = init;
226 loop:
227 sum_0 = phi <init, sum_1>
228 S1 x_t = ...
229 S2 y_t = ...
230 S3 x_T = (TYPE1) x_t;
231 S4 y_T = (TYPE1) y_t;
232 S5 prod = x_T * y_T;
233 [S6 prod = (TYPE2) prod; #optional]
234 S7 sum_1 = prod + sum_0;
235
236 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
237 same size of 'TYPE1' or bigger. This is a special case of a reduction
238 computation.
239
240 Input:
241
242 * STMTS: Contains a stmt from which the pattern search begins. In the
243 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
244 will be detected.
245
246 Output:
247
248 * TYPE_IN: The type of the input arguments to the pattern.
249
250 * TYPE_OUT: The type of the output of this pattern.
251
252 * Return value: A new stmt that will be used to replace the sequence of
253 stmts that constitute the pattern. In this case it will be:
254 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
255
256 Note: The dot-prod idiom is a widening reduction pattern that is
257 vectorized without preserving all the intermediate results. It
258 produces only N/2 (widened) results (by summing up pairs of
259 intermediate results) rather than all N results. Therefore, we
260 cannot allow this pattern when we want to get all the results and in
261 the correct order (as is the case when this computation is in an
262 inner-loop nested in an outer-loop that us being vectorized). */
263
264 static gimple
265 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
266 tree *type_out)
267 {
268 gimple stmt, last_stmt = (*stmts)[0];
269 tree oprnd0, oprnd1;
270 tree oprnd00, oprnd01;
271 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
272 tree type, half_type;
273 gimple pattern_stmt;
274 tree prod_type;
275 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
276 struct loop *loop;
277 tree var;
278 bool promotion;
279
280 if (!loop_info)
281 return NULL;
282
283 loop = LOOP_VINFO_LOOP (loop_info);
284
285 if (!is_gimple_assign (last_stmt))
286 return NULL;
287
288 type = gimple_expr_type (last_stmt);
289
290 /* Look for the following pattern
291 DX = (TYPE1) X;
292 DY = (TYPE1) Y;
293 DPROD = DX * DY;
294 DDPROD = (TYPE2) DPROD;
295 sum_1 = DDPROD + sum_0;
296 In which
297 - DX is double the size of X
298 - DY is double the size of Y
299 - DX, DY, DPROD all have the same type
300 - sum is the same size of DPROD or bigger
301 - sum has been recognized as a reduction variable.
302
303 This is equivalent to:
304 DPROD = X w* Y; #widen mult
305 sum_1 = DPROD w+ sum_0; #widen summation
306 or
307 DPROD = X w* Y; #widen mult
308 sum_1 = DPROD + sum_0; #summation
309 */
310
311 /* Starting from LAST_STMT, follow the defs of its uses in search
312 of the above pattern. */
313
314 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
315 return NULL;
316
317 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
318 {
319 /* Has been detected as widening-summation? */
320
321 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
322 type = gimple_expr_type (stmt);
323 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
324 return NULL;
325 oprnd0 = gimple_assign_rhs1 (stmt);
326 oprnd1 = gimple_assign_rhs2 (stmt);
327 half_type = TREE_TYPE (oprnd0);
328 }
329 else
330 {
331 gimple def_stmt;
332
333 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
334 return NULL;
335 oprnd0 = gimple_assign_rhs1 (last_stmt);
336 oprnd1 = gimple_assign_rhs2 (last_stmt);
337 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
338 || !types_compatible_p (TREE_TYPE (oprnd1), type))
339 return NULL;
340 stmt = last_stmt;
341
342 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
343 &promotion)
344 && promotion)
345 {
346 stmt = def_stmt;
347 oprnd0 = gimple_assign_rhs1 (stmt);
348 }
349 else
350 half_type = type;
351 }
352
353 /* So far so good. Since last_stmt was detected as a (summation) reduction,
354 we know that oprnd1 is the reduction variable (defined by a loop-header
355 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
356 Left to check that oprnd0 is defined by a (widen_)mult_expr */
357 if (TREE_CODE (oprnd0) != SSA_NAME)
358 return NULL;
359
360 prod_type = half_type;
361 stmt = SSA_NAME_DEF_STMT (oprnd0);
362
363 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
364 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
365 return NULL;
366
367 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
368 inside the loop (in case we are analyzing an outer-loop). */
369 if (!is_gimple_assign (stmt))
370 return NULL;
371 stmt_vinfo = vinfo_for_stmt (stmt);
372 gcc_assert (stmt_vinfo);
373 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
374 return NULL;
375 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
376 return NULL;
377 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
378 {
379 /* Has been detected as a widening multiplication? */
380
381 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
382 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
383 return NULL;
384 stmt_vinfo = vinfo_for_stmt (stmt);
385 gcc_assert (stmt_vinfo);
386 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
387 oprnd00 = gimple_assign_rhs1 (stmt);
388 oprnd01 = gimple_assign_rhs2 (stmt);
389 }
390 else
391 {
392 tree half_type0, half_type1;
393 gimple def_stmt;
394 tree oprnd0, oprnd1;
395
396 oprnd0 = gimple_assign_rhs1 (stmt);
397 oprnd1 = gimple_assign_rhs2 (stmt);
398 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
399 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
400 return NULL;
401 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
402 &promotion)
403 || !promotion)
404 return NULL;
405 oprnd00 = gimple_assign_rhs1 (def_stmt);
406 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
407 &promotion)
408 || !promotion)
409 return NULL;
410 oprnd01 = gimple_assign_rhs1 (def_stmt);
411 if (!types_compatible_p (half_type0, half_type1))
412 return NULL;
413 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
414 return NULL;
415 }
416
417 half_type = TREE_TYPE (oprnd00);
418 *type_in = half_type;
419 *type_out = type;
420
421 /* Pattern detected. Create a stmt to be used to replace the pattern: */
422 var = vect_recog_temp_ssa_var (type, NULL);
423 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
424 oprnd00, oprnd01, oprnd1);
425
426 if (dump_enabled_p ())
427 {
428 dump_printf_loc (MSG_NOTE, vect_location,
429 "vect_recog_dot_prod_pattern: detected: ");
430 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
431 dump_printf (MSG_NOTE, "\n");
432 }
433
434 /* We don't allow changing the order of the computation in the inner-loop
435 when doing outer-loop vectorization. */
436 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
437
438 return pattern_stmt;
439 }
440
441
442 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
443 and LSHIFT_EXPR.
444
445 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
446 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
447
448 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
449 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
450 that satisfies the above restrictions, we can perform a widening opeartion
451 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
452 with a_it = (interm_type) a_t; */
453
454 static bool
455 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
456 tree const_oprnd, tree *oprnd,
457 vec<gimple> *stmts, tree type,
458 tree *half_type, gimple def_stmt)
459 {
460 tree new_type, new_oprnd;
461 gimple new_stmt;
462
463 if (code != MULT_EXPR && code != LSHIFT_EXPR)
464 return false;
465
466 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
467 || (code == LSHIFT_EXPR
468 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
469 != 1))
470 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
471 {
472 /* CONST_OPRND is a constant of HALF_TYPE. */
473 *oprnd = gimple_assign_rhs1 (def_stmt);
474 return true;
475 }
476
477 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
478 return false;
479
480 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
481 return false;
482
483 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
484 a type 2 times bigger than HALF_TYPE. */
485 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
486 TYPE_UNSIGNED (type));
487 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
488 || (code == LSHIFT_EXPR
489 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
490 return false;
491
492 /* Use NEW_TYPE for widening operation. */
493 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
494 {
495 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
496 /* Check if the already created pattern stmt is what we need. */
497 if (!is_gimple_assign (new_stmt)
498 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
499 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
500 return false;
501
502 stmts->safe_push (def_stmt);
503 *oprnd = gimple_assign_lhs (new_stmt);
504 }
505 else
506 {
507 /* Create a_T = (NEW_TYPE) a_t; */
508 *oprnd = gimple_assign_rhs1 (def_stmt);
509 new_oprnd = make_ssa_name (new_type, NULL);
510 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
511 NULL_TREE);
512 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
513 stmts->safe_push (def_stmt);
514 *oprnd = new_oprnd;
515 }
516
517 *half_type = new_type;
518 return true;
519 }
520
521
522 /* Function vect_recog_widen_mult_pattern
523
524 Try to find the following pattern:
525
526 type a_t, b_t;
527 TYPE a_T, b_T, prod_T;
528
529 S1 a_t = ;
530 S2 b_t = ;
531 S3 a_T = (TYPE) a_t;
532 S4 b_T = (TYPE) b_t;
533 S5 prod_T = a_T * b_T;
534
535 where type 'TYPE' is at least double the size of type 'type'.
536
537 Also detect unsigned cases:
538
539 unsigned type a_t, b_t;
540 unsigned TYPE u_prod_T;
541 TYPE a_T, b_T, prod_T;
542
543 S1 a_t = ;
544 S2 b_t = ;
545 S3 a_T = (TYPE) a_t;
546 S4 b_T = (TYPE) b_t;
547 S5 prod_T = a_T * b_T;
548 S6 u_prod_T = (unsigned TYPE) prod_T;
549
550 and multiplication by constants:
551
552 type a_t;
553 TYPE a_T, prod_T;
554
555 S1 a_t = ;
556 S3 a_T = (TYPE) a_t;
557 S5 prod_T = a_T * CONST;
558
559 A special case of multiplication by constants is when 'TYPE' is 4 times
560 bigger than 'type', but CONST fits an intermediate type 2 times smaller
561 than 'TYPE'. In that case we create an additional pattern stmt for S3
562 to create a variable of the intermediate type, and perform widen-mult
563 on the intermediate type as well:
564
565 type a_t;
566 interm_type a_it;
567 TYPE a_T, prod_T, prod_T';
568
569 S1 a_t = ;
570 S3 a_T = (TYPE) a_t;
571 '--> a_it = (interm_type) a_t;
572 S5 prod_T = a_T * CONST;
573 '--> prod_T' = a_it w* CONST;
574
575 Input/Output:
576
577 * STMTS: Contains a stmt from which the pattern search begins. In the
578 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
579 is detected. In case of unsigned widen-mult, the original stmt (S5) is
580 replaced with S6 in STMTS. In case of multiplication by a constant
581 of an intermediate type (the last case above), STMTS also contains S3
582 (inserted before S5).
583
584 Output:
585
586 * TYPE_IN: The type of the input arguments to the pattern.
587
588 * TYPE_OUT: The type of the output of this pattern.
589
590 * Return value: A new stmt that will be used to replace the sequence of
591 stmts that constitute the pattern. In this case it will be:
592 WIDEN_MULT <a_t, b_t>
593 */
594
595 static gimple
596 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
597 tree *type_in, tree *type_out)
598 {
599 gimple last_stmt = stmts->pop ();
600 gimple def_stmt0, def_stmt1;
601 tree oprnd0, oprnd1;
602 tree type, half_type0, half_type1;
603 gimple pattern_stmt;
604 tree vectype, vectype_out = NULL_TREE;
605 tree var;
606 enum tree_code dummy_code;
607 int dummy_int;
608 vec<tree> dummy_vec;
609 bool op1_ok;
610 bool promotion;
611
612 if (!is_gimple_assign (last_stmt))
613 return NULL;
614
615 type = gimple_expr_type (last_stmt);
616
617 /* Starting from LAST_STMT, follow the defs of its uses in search
618 of the above pattern. */
619
620 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
621 return NULL;
622
623 oprnd0 = gimple_assign_rhs1 (last_stmt);
624 oprnd1 = gimple_assign_rhs2 (last_stmt);
625 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
626 || !types_compatible_p (TREE_TYPE (oprnd1), type))
627 return NULL;
628
629 /* Check argument 0. */
630 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
631 &promotion)
632 || !promotion)
633 return NULL;
634 /* Check argument 1. */
635 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
636 &def_stmt1, &promotion);
637
638 if (op1_ok && promotion)
639 {
640 oprnd0 = gimple_assign_rhs1 (def_stmt0);
641 oprnd1 = gimple_assign_rhs1 (def_stmt1);
642 }
643 else
644 {
645 if (TREE_CODE (oprnd1) == INTEGER_CST
646 && TREE_CODE (half_type0) == INTEGER_TYPE
647 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
648 &oprnd0, stmts, type,
649 &half_type0, def_stmt0))
650 {
651 half_type1 = half_type0;
652 oprnd1 = fold_convert (half_type1, oprnd1);
653 }
654 else
655 return NULL;
656 }
657
658 /* Handle unsigned case. Look for
659 S6 u_prod_T = (unsigned TYPE) prod_T;
660 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
661 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
662 {
663 gimple use_stmt;
664 tree use_lhs;
665 tree use_type;
666
667 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
668 return NULL;
669
670 use_stmt = vect_single_imm_use (last_stmt);
671 if (!use_stmt || !is_gimple_assign (use_stmt)
672 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
673 return NULL;
674
675 use_lhs = gimple_assign_lhs (use_stmt);
676 use_type = TREE_TYPE (use_lhs);
677 if (!INTEGRAL_TYPE_P (use_type)
678 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
679 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
680 return NULL;
681
682 type = use_type;
683 last_stmt = use_stmt;
684 }
685
686 if (!types_compatible_p (half_type0, half_type1))
687 return NULL;
688
689 /* Pattern detected. */
690 if (dump_enabled_p ())
691 dump_printf_loc (MSG_NOTE, vect_location,
692 "vect_recog_widen_mult_pattern: detected:\n");
693
694 /* Check target support */
695 vectype = get_vectype_for_scalar_type (half_type0);
696 vectype_out = get_vectype_for_scalar_type (type);
697 if (!vectype
698 || !vectype_out
699 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
700 vectype_out, vectype,
701 &dummy_code, &dummy_code,
702 &dummy_int, &dummy_vec))
703 return NULL;
704
705 *type_in = vectype;
706 *type_out = vectype_out;
707
708 /* Pattern supported. Create a stmt to be used to replace the pattern: */
709 var = vect_recog_temp_ssa_var (type, NULL);
710 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
711 oprnd1);
712
713 if (dump_enabled_p ())
714 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
715
716 stmts->safe_push (last_stmt);
717 return pattern_stmt;
718 }
719
720
721 /* Function vect_recog_pow_pattern
722
723 Try to find the following pattern:
724
725 x = POW (y, N);
726
727 with POW being one of pow, powf, powi, powif and N being
728 either 2 or 0.5.
729
730 Input:
731
732 * LAST_STMT: A stmt from which the pattern search begins.
733
734 Output:
735
736 * TYPE_IN: The type of the input arguments to the pattern.
737
738 * TYPE_OUT: The type of the output of this pattern.
739
740 * Return value: A new stmt that will be used to replace the sequence of
741 stmts that constitute the pattern. In this case it will be:
742 x = x * x
743 or
744 x = sqrt (x)
745 */
746
747 static gimple
748 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
749 tree *type_out)
750 {
751 gimple last_stmt = (*stmts)[0];
752 tree fn, base, exp = NULL;
753 gimple stmt;
754 tree var;
755
756 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
757 return NULL;
758
759 fn = gimple_call_fndecl (last_stmt);
760 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
761 return NULL;
762
763 switch (DECL_FUNCTION_CODE (fn))
764 {
765 case BUILT_IN_POWIF:
766 case BUILT_IN_POWI:
767 case BUILT_IN_POWF:
768 case BUILT_IN_POW:
769 base = gimple_call_arg (last_stmt, 0);
770 exp = gimple_call_arg (last_stmt, 1);
771 if (TREE_CODE (exp) != REAL_CST
772 && TREE_CODE (exp) != INTEGER_CST)
773 return NULL;
774 break;
775
776 default:
777 return NULL;
778 }
779
780 /* We now have a pow or powi builtin function call with a constant
781 exponent. */
782
783 *type_out = NULL_TREE;
784
785 /* Catch squaring. */
786 if ((tree_fits_shwi_p (exp)
787 && tree_low_cst (exp, 0) == 2)
788 || (TREE_CODE (exp) == REAL_CST
789 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
790 {
791 *type_in = TREE_TYPE (base);
792
793 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
794 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
795 return stmt;
796 }
797
798 /* Catch square root. */
799 if (TREE_CODE (exp) == REAL_CST
800 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
801 {
802 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
803 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
804 if (*type_in)
805 {
806 gimple stmt = gimple_build_call (newfn, 1, base);
807 if (vectorizable_function (stmt, *type_in, *type_in)
808 != NULL_TREE)
809 {
810 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
811 gimple_call_set_lhs (stmt, var);
812 return stmt;
813 }
814 }
815 }
816
817 return NULL;
818 }
819
820
821 /* Function vect_recog_widen_sum_pattern
822
823 Try to find the following pattern:
824
825 type x_t;
826 TYPE x_T, sum = init;
827 loop:
828 sum_0 = phi <init, sum_1>
829 S1 x_t = *p;
830 S2 x_T = (TYPE) x_t;
831 S3 sum_1 = x_T + sum_0;
832
833 where type 'TYPE' is at least double the size of type 'type', i.e - we're
834 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
835 a special case of a reduction computation.
836
837 Input:
838
839 * LAST_STMT: A stmt from which the pattern search begins. In the example,
840 when this function is called with S3, the pattern {S2,S3} will be detected.
841
842 Output:
843
844 * TYPE_IN: The type of the input arguments to the pattern.
845
846 * TYPE_OUT: The type of the output of this pattern.
847
848 * Return value: A new stmt that will be used to replace the sequence of
849 stmts that constitute the pattern. In this case it will be:
850 WIDEN_SUM <x_t, sum_0>
851
852 Note: The widening-sum idiom is a widening reduction pattern that is
853 vectorized without preserving all the intermediate results. It
854 produces only N/2 (widened) results (by summing up pairs of
855 intermediate results) rather than all N results. Therefore, we
856 cannot allow this pattern when we want to get all the results and in
857 the correct order (as is the case when this computation is in an
858 inner-loop nested in an outer-loop that us being vectorized). */
859
860 static gimple
861 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
862 tree *type_out)
863 {
864 gimple stmt, last_stmt = (*stmts)[0];
865 tree oprnd0, oprnd1;
866 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
867 tree type, half_type;
868 gimple pattern_stmt;
869 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
870 struct loop *loop;
871 tree var;
872 bool promotion;
873
874 if (!loop_info)
875 return NULL;
876
877 loop = LOOP_VINFO_LOOP (loop_info);
878
879 if (!is_gimple_assign (last_stmt))
880 return NULL;
881
882 type = gimple_expr_type (last_stmt);
883
884 /* Look for the following pattern
885 DX = (TYPE) X;
886 sum_1 = DX + sum_0;
887 In which DX is at least double the size of X, and sum_1 has been
888 recognized as a reduction variable.
889 */
890
891 /* Starting from LAST_STMT, follow the defs of its uses in search
892 of the above pattern. */
893
894 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
895 return NULL;
896
897 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
898 return NULL;
899
900 oprnd0 = gimple_assign_rhs1 (last_stmt);
901 oprnd1 = gimple_assign_rhs2 (last_stmt);
902 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
903 || !types_compatible_p (TREE_TYPE (oprnd1), type))
904 return NULL;
905
906 /* So far so good. Since last_stmt was detected as a (summation) reduction,
907 we know that oprnd1 is the reduction variable (defined by a loop-header
908 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
909 Left to check that oprnd0 is defined by a cast from type 'type' to type
910 'TYPE'. */
911
912 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
913 &promotion)
914 || !promotion)
915 return NULL;
916
917 oprnd0 = gimple_assign_rhs1 (stmt);
918 *type_in = half_type;
919 *type_out = type;
920
921 /* Pattern detected. Create a stmt to be used to replace the pattern: */
922 var = vect_recog_temp_ssa_var (type, NULL);
923 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
924 oprnd0, oprnd1);
925
926 if (dump_enabled_p ())
927 {
928 dump_printf_loc (MSG_NOTE, vect_location,
929 "vect_recog_widen_sum_pattern: detected: ");
930 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
931 dump_printf (MSG_NOTE, "\n");
932 }
933
934 /* We don't allow changing the order of the computation in the inner-loop
935 when doing outer-loop vectorization. */
936 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
937
938 return pattern_stmt;
939 }
940
941
942 /* Return TRUE if the operation in STMT can be performed on a smaller type.
943
944 Input:
945 STMT - a statement to check.
946 DEF - we support operations with two operands, one of which is constant.
947 The other operand can be defined by a demotion operation, or by a
948 previous statement in a sequence of over-promoted operations. In the
949 later case DEF is used to replace that operand. (It is defined by a
950 pattern statement we created for the previous statement in the
951 sequence).
952
953 Input/output:
954 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
955 NULL, it's the type of DEF.
956 STMTS - additional pattern statements. If a pattern statement (type
957 conversion) is created in this function, its original statement is
958 added to STMTS.
959
960 Output:
961 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
962 operands to use in the new pattern statement for STMT (will be created
963 in vect_recog_over_widening_pattern ()).
964 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
965 statements for STMT: the first one is a type promotion and the second
966 one is the operation itself. We return the type promotion statement
967 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
968 the second pattern statement. */
969
970 static bool
971 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
972 tree *op0, tree *op1, gimple *new_def_stmt,
973 vec<gimple> *stmts)
974 {
975 enum tree_code code;
976 tree const_oprnd, oprnd;
977 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
978 gimple def_stmt, new_stmt;
979 bool first = false;
980 bool promotion;
981
982 *op0 = NULL_TREE;
983 *op1 = NULL_TREE;
984 *new_def_stmt = NULL;
985
986 if (!is_gimple_assign (stmt))
987 return false;
988
989 code = gimple_assign_rhs_code (stmt);
990 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
991 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
992 return false;
993
994 oprnd = gimple_assign_rhs1 (stmt);
995 const_oprnd = gimple_assign_rhs2 (stmt);
996 type = gimple_expr_type (stmt);
997
998 if (TREE_CODE (oprnd) != SSA_NAME
999 || TREE_CODE (const_oprnd) != INTEGER_CST)
1000 return false;
1001
1002 /* If oprnd has other uses besides that in stmt we cannot mark it
1003 as being part of a pattern only. */
1004 if (!has_single_use (oprnd))
1005 return false;
1006
1007 /* If we are in the middle of a sequence, we use DEF from a previous
1008 statement. Otherwise, OPRND has to be a result of type promotion. */
1009 if (*new_type)
1010 {
1011 half_type = *new_type;
1012 oprnd = def;
1013 }
1014 else
1015 {
1016 first = true;
1017 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1018 &promotion)
1019 || !promotion
1020 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1021 return false;
1022 }
1023
1024 /* Can we perform the operation on a smaller type? */
1025 switch (code)
1026 {
1027 case BIT_IOR_EXPR:
1028 case BIT_XOR_EXPR:
1029 case BIT_AND_EXPR:
1030 if (!int_fits_type_p (const_oprnd, half_type))
1031 {
1032 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1033 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1034 return false;
1035
1036 interm_type = build_nonstandard_integer_type (
1037 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1038 if (!int_fits_type_p (const_oprnd, interm_type))
1039 return false;
1040 }
1041
1042 break;
1043
1044 case LSHIFT_EXPR:
1045 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1046 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1047 return false;
1048
1049 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1050 (e.g., if the original value was char, the shift amount is at most 8
1051 if we want to use short). */
1052 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1053 return false;
1054
1055 interm_type = build_nonstandard_integer_type (
1056 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1057
1058 if (!vect_supportable_shift (code, interm_type))
1059 return false;
1060
1061 break;
1062
1063 case RSHIFT_EXPR:
1064 if (vect_supportable_shift (code, half_type))
1065 break;
1066
1067 /* Try intermediate type - HALF_TYPE is not supported. */
1068 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1069 return false;
1070
1071 interm_type = build_nonstandard_integer_type (
1072 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1073
1074 if (!vect_supportable_shift (code, interm_type))
1075 return false;
1076
1077 break;
1078
1079 default:
1080 gcc_unreachable ();
1081 }
1082
1083 /* There are four possible cases:
1084 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1085 the first statement in the sequence)
1086 a. The original, HALF_TYPE, is not enough - we replace the promotion
1087 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1088 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1089 promotion.
1090 2. OPRND is defined by a pattern statement we created.
1091 a. Its type is not sufficient for the operation, we create a new stmt:
1092 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1093 this statement in NEW_DEF_STMT, and it is later put in
1094 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1095 b. OPRND is good to use in the new statement. */
1096 if (first)
1097 {
1098 if (interm_type)
1099 {
1100 /* Replace the original type conversion HALF_TYPE->TYPE with
1101 HALF_TYPE->INTERM_TYPE. */
1102 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1103 {
1104 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1105 /* Check if the already created pattern stmt is what we need. */
1106 if (!is_gimple_assign (new_stmt)
1107 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1108 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1109 return false;
1110
1111 stmts->safe_push (def_stmt);
1112 oprnd = gimple_assign_lhs (new_stmt);
1113 }
1114 else
1115 {
1116 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1117 oprnd = gimple_assign_rhs1 (def_stmt);
1118 new_oprnd = make_ssa_name (interm_type, NULL);
1119 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1120 oprnd, NULL_TREE);
1121 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1122 stmts->safe_push (def_stmt);
1123 oprnd = new_oprnd;
1124 }
1125 }
1126 else
1127 {
1128 /* Retrieve the operand before the type promotion. */
1129 oprnd = gimple_assign_rhs1 (def_stmt);
1130 }
1131 }
1132 else
1133 {
1134 if (interm_type)
1135 {
1136 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1137 new_oprnd = make_ssa_name (interm_type, NULL);
1138 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1139 oprnd, NULL_TREE);
1140 oprnd = new_oprnd;
1141 *new_def_stmt = new_stmt;
1142 }
1143
1144 /* Otherwise, OPRND is already set. */
1145 }
1146
1147 if (interm_type)
1148 *new_type = interm_type;
1149 else
1150 *new_type = half_type;
1151
1152 *op0 = oprnd;
1153 *op1 = fold_convert (*new_type, const_oprnd);
1154
1155 return true;
1156 }
1157
1158
1159 /* Try to find a statement or a sequence of statements that can be performed
1160 on a smaller type:
1161
1162 type x_t;
1163 TYPE x_T, res0_T, res1_T;
1164 loop:
1165 S1 x_t = *p;
1166 S2 x_T = (TYPE) x_t;
1167 S3 res0_T = op (x_T, C0);
1168 S4 res1_T = op (res0_T, C1);
1169 S5 ... = () res1_T; - type demotion
1170
1171 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1172 constants.
1173 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1174 be 'type' or some intermediate type. For now, we expect S5 to be a type
1175 demotion operation. We also check that S3 and S4 have only one use. */
1176
1177 static gimple
1178 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1179 tree *type_in, tree *type_out)
1180 {
1181 gimple stmt = stmts->pop ();
1182 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1183 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1184 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1185 bool first;
1186 tree type = NULL;
1187
1188 first = true;
1189 while (1)
1190 {
1191 if (!vinfo_for_stmt (stmt)
1192 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1193 return NULL;
1194
1195 new_def_stmt = NULL;
1196 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1197 &op0, &op1, &new_def_stmt,
1198 stmts))
1199 {
1200 if (first)
1201 return NULL;
1202 else
1203 break;
1204 }
1205
1206 /* STMT can be performed on a smaller type. Check its uses. */
1207 use_stmt = vect_single_imm_use (stmt);
1208 if (!use_stmt || !is_gimple_assign (use_stmt))
1209 return NULL;
1210
1211 /* Create pattern statement for STMT. */
1212 vectype = get_vectype_for_scalar_type (new_type);
1213 if (!vectype)
1214 return NULL;
1215
1216 /* We want to collect all the statements for which we create pattern
1217 statetments, except for the case when the last statement in the
1218 sequence doesn't have a corresponding pattern statement. In such
1219 case we associate the last pattern statement with the last statement
1220 in the sequence. Therefore, we only add the original statement to
1221 the list if we know that it is not the last. */
1222 if (prev_stmt)
1223 stmts->safe_push (prev_stmt);
1224
1225 var = vect_recog_temp_ssa_var (new_type, NULL);
1226 pattern_stmt
1227 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1228 op0, op1);
1229 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1230 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1231
1232 if (dump_enabled_p ())
1233 {
1234 dump_printf_loc (MSG_NOTE, vect_location,
1235 "created pattern stmt: ");
1236 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1237 dump_printf (MSG_NOTE, "\n");
1238 }
1239
1240 type = gimple_expr_type (stmt);
1241 prev_stmt = stmt;
1242 stmt = use_stmt;
1243
1244 first = false;
1245 }
1246
1247 /* We got a sequence. We expect it to end with a type demotion operation.
1248 Otherwise, we quit (for now). There are three possible cases: the
1249 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1250 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1251 NEW_TYPE differs (we create a new conversion statement). */
1252 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1253 {
1254 use_lhs = gimple_assign_lhs (use_stmt);
1255 use_type = TREE_TYPE (use_lhs);
1256 /* Support only type demotion or signedess change. */
1257 if (!INTEGRAL_TYPE_P (use_type)
1258 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1259 return NULL;
1260
1261 /* Check that NEW_TYPE is not bigger than the conversion result. */
1262 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1263 return NULL;
1264
1265 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1266 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1267 {
1268 /* Create NEW_TYPE->USE_TYPE conversion. */
1269 new_oprnd = make_ssa_name (use_type, NULL);
1270 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1271 var, NULL_TREE);
1272 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1273
1274 *type_in = get_vectype_for_scalar_type (new_type);
1275 *type_out = get_vectype_for_scalar_type (use_type);
1276
1277 /* We created a pattern statement for the last statement in the
1278 sequence, so we don't need to associate it with the pattern
1279 statement created for PREV_STMT. Therefore, we add PREV_STMT
1280 to the list in order to mark it later in vect_pattern_recog_1. */
1281 if (prev_stmt)
1282 stmts->safe_push (prev_stmt);
1283 }
1284 else
1285 {
1286 if (prev_stmt)
1287 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1288 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1289
1290 *type_in = vectype;
1291 *type_out = NULL_TREE;
1292 }
1293
1294 stmts->safe_push (use_stmt);
1295 }
1296 else
1297 /* TODO: support general case, create a conversion to the correct type. */
1298 return NULL;
1299
1300 /* Pattern detected. */
1301 if (dump_enabled_p ())
1302 {
1303 dump_printf_loc (MSG_NOTE, vect_location,
1304 "vect_recog_over_widening_pattern: detected: ");
1305 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1306 dump_printf (MSG_NOTE, "\n");
1307 }
1308
1309 return pattern_stmt;
1310 }
1311
1312 /* Detect widening shift pattern:
1313
1314 type a_t;
1315 TYPE a_T, res_T;
1316
1317 S1 a_t = ;
1318 S2 a_T = (TYPE) a_t;
1319 S3 res_T = a_T << CONST;
1320
1321 where type 'TYPE' is at least double the size of type 'type'.
1322
1323 Also detect cases where the shift result is immediately converted
1324 to another type 'result_type' that is no larger in size than 'TYPE'.
1325 In those cases we perform a widen-shift that directly results in
1326 'result_type', to avoid a possible over-widening situation:
1327
1328 type a_t;
1329 TYPE a_T, res_T;
1330 result_type res_result;
1331
1332 S1 a_t = ;
1333 S2 a_T = (TYPE) a_t;
1334 S3 res_T = a_T << CONST;
1335 S4 res_result = (result_type) res_T;
1336 '--> res_result' = a_t w<< CONST;
1337
1338 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1339 create an additional pattern stmt for S2 to create a variable of an
1340 intermediate type, and perform widen-shift on the intermediate type:
1341
1342 type a_t;
1343 interm_type a_it;
1344 TYPE a_T, res_T, res_T';
1345
1346 S1 a_t = ;
1347 S2 a_T = (TYPE) a_t;
1348 '--> a_it = (interm_type) a_t;
1349 S3 res_T = a_T << CONST;
1350 '--> res_T' = a_it <<* CONST;
1351
1352 Input/Output:
1353
1354 * STMTS: Contains a stmt from which the pattern search begins.
1355 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1356 in STMTS. When an intermediate type is used and a pattern statement is
1357 created for S2, we also put S2 here (before S3).
1358
1359 Output:
1360
1361 * TYPE_IN: The type of the input arguments to the pattern.
1362
1363 * TYPE_OUT: The type of the output of this pattern.
1364
1365 * Return value: A new stmt that will be used to replace the sequence of
1366 stmts that constitute the pattern. In this case it will be:
1367 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1368
1369 static gimple
1370 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1371 tree *type_in, tree *type_out)
1372 {
1373 gimple last_stmt = stmts->pop ();
1374 gimple def_stmt0;
1375 tree oprnd0, oprnd1;
1376 tree type, half_type0;
1377 gimple pattern_stmt;
1378 tree vectype, vectype_out = NULL_TREE;
1379 tree var;
1380 enum tree_code dummy_code;
1381 int dummy_int;
1382 vec<tree> dummy_vec;
1383 gimple use_stmt;
1384 bool promotion;
1385
1386 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1387 return NULL;
1388
1389 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1390 return NULL;
1391
1392 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1393 return NULL;
1394
1395 oprnd0 = gimple_assign_rhs1 (last_stmt);
1396 oprnd1 = gimple_assign_rhs2 (last_stmt);
1397 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1398 return NULL;
1399
1400 /* Check operand 0: it has to be defined by a type promotion. */
1401 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1402 &promotion)
1403 || !promotion)
1404 return NULL;
1405
1406 /* Check operand 1: has to be positive. We check that it fits the type
1407 in vect_handle_widen_op_by_const (). */
1408 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1409 return NULL;
1410
1411 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1412 type = gimple_expr_type (last_stmt);
1413
1414 /* Check for subsequent conversion to another type. */
1415 use_stmt = vect_single_imm_use (last_stmt);
1416 if (use_stmt && is_gimple_assign (use_stmt)
1417 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1418 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1419 {
1420 tree use_lhs = gimple_assign_lhs (use_stmt);
1421 tree use_type = TREE_TYPE (use_lhs);
1422
1423 if (INTEGRAL_TYPE_P (use_type)
1424 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1425 {
1426 last_stmt = use_stmt;
1427 type = use_type;
1428 }
1429 }
1430
1431 /* Check if this a widening operation. */
1432 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1433 &oprnd0, stmts,
1434 type, &half_type0, def_stmt0))
1435 return NULL;
1436
1437 /* Pattern detected. */
1438 if (dump_enabled_p ())
1439 dump_printf_loc (MSG_NOTE, vect_location,
1440 "vect_recog_widen_shift_pattern: detected:\n");
1441
1442 /* Check target support. */
1443 vectype = get_vectype_for_scalar_type (half_type0);
1444 vectype_out = get_vectype_for_scalar_type (type);
1445
1446 if (!vectype
1447 || !vectype_out
1448 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1449 vectype_out, vectype,
1450 &dummy_code, &dummy_code,
1451 &dummy_int, &dummy_vec))
1452 return NULL;
1453
1454 *type_in = vectype;
1455 *type_out = vectype_out;
1456
1457 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1458 var = vect_recog_temp_ssa_var (type, NULL);
1459 pattern_stmt =
1460 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1461
1462 if (dump_enabled_p ())
1463 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1464
1465 stmts->safe_push (last_stmt);
1466 return pattern_stmt;
1467 }
1468
1469 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1470
1471 type a_t, b_t, c_t;
1472
1473 S0 a_t = b_t r<< c_t;
1474
1475 Input/Output:
1476
1477 * STMTS: Contains a stmt from which the pattern search begins,
1478 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1479 with a sequence:
1480
1481 S1 d_t = -c_t;
1482 S2 e_t = d_t & (B - 1);
1483 S3 f_t = b_t << c_t;
1484 S4 g_t = b_t >> e_t;
1485 S0 a_t = f_t | g_t;
1486
1487 where B is element bitsize of type.
1488
1489 Output:
1490
1491 * TYPE_IN: The type of the input arguments to the pattern.
1492
1493 * TYPE_OUT: The type of the output of this pattern.
1494
1495 * Return value: A new stmt that will be used to replace the rotate
1496 S0 stmt. */
1497
1498 static gimple
1499 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1500 {
1501 gimple last_stmt = stmts->pop ();
1502 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1503 gimple pattern_stmt, def_stmt;
1504 enum tree_code rhs_code;
1505 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1506 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1507 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1508 enum vect_def_type dt;
1509 optab optab1, optab2;
1510 edge ext_def = NULL;
1511
1512 if (!is_gimple_assign (last_stmt))
1513 return NULL;
1514
1515 rhs_code = gimple_assign_rhs_code (last_stmt);
1516 switch (rhs_code)
1517 {
1518 case LROTATE_EXPR:
1519 case RROTATE_EXPR:
1520 break;
1521 default:
1522 return NULL;
1523 }
1524
1525 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1526 return NULL;
1527
1528 lhs = gimple_assign_lhs (last_stmt);
1529 oprnd0 = gimple_assign_rhs1 (last_stmt);
1530 type = TREE_TYPE (oprnd0);
1531 oprnd1 = gimple_assign_rhs2 (last_stmt);
1532 if (TREE_CODE (oprnd0) != SSA_NAME
1533 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1534 || !INTEGRAL_TYPE_P (type)
1535 || !TYPE_UNSIGNED (type))
1536 return NULL;
1537
1538 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1539 &def, &dt))
1540 return NULL;
1541
1542 if (dt != vect_internal_def
1543 && dt != vect_constant_def
1544 && dt != vect_external_def)
1545 return NULL;
1546
1547 vectype = get_vectype_for_scalar_type (type);
1548 if (vectype == NULL_TREE)
1549 return NULL;
1550
1551 /* If vector/vector or vector/scalar rotate is supported by the target,
1552 don't do anything here. */
1553 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1554 if (optab1
1555 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1556 return NULL;
1557
1558 if (bb_vinfo != NULL || dt != vect_internal_def)
1559 {
1560 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1561 if (optab2
1562 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1563 return NULL;
1564 }
1565
1566 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1567 don't do anything here either. */
1568 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1569 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1570 if (!optab1
1571 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1572 || !optab2
1573 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1574 {
1575 if (bb_vinfo == NULL && dt == vect_internal_def)
1576 return NULL;
1577 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1578 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1579 if (!optab1
1580 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1581 || !optab2
1582 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1583 return NULL;
1584 }
1585
1586 *type_in = vectype;
1587 *type_out = vectype;
1588 if (*type_in == NULL_TREE)
1589 return NULL;
1590
1591 if (dt == vect_external_def
1592 && TREE_CODE (oprnd1) == SSA_NAME
1593 && loop_vinfo)
1594 {
1595 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1596 ext_def = loop_preheader_edge (loop);
1597 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1598 {
1599 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1600 if (bb == NULL
1601 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1602 ext_def = NULL;
1603 }
1604 }
1605
1606 def = NULL_TREE;
1607 if (TREE_CODE (oprnd1) == INTEGER_CST
1608 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1609 def = oprnd1;
1610 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1611 {
1612 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1613 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1614 && TYPE_PRECISION (TREE_TYPE (rhs1))
1615 == TYPE_PRECISION (type))
1616 def = rhs1;
1617 }
1618
1619 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1620 if (def == NULL_TREE)
1621 {
1622 def = vect_recog_temp_ssa_var (type, NULL);
1623 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1624 NULL_TREE);
1625 if (ext_def)
1626 {
1627 basic_block new_bb
1628 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1629 gcc_assert (!new_bb);
1630 }
1631 else
1632 append_pattern_def_seq (stmt_vinfo, def_stmt);
1633 }
1634 stype = TREE_TYPE (def);
1635
1636 if (TREE_CODE (def) == INTEGER_CST)
1637 {
1638 if (!host_integerp (def, 1)
1639 || (unsigned HOST_WIDE_INT) tree_low_cst (def, 1)
1640 >= GET_MODE_PRECISION (TYPE_MODE (type))
1641 || integer_zerop (def))
1642 return NULL;
1643 def2 = build_int_cst (stype,
1644 GET_MODE_PRECISION (TYPE_MODE (type))
1645 - tree_low_cst (def, 1));
1646 }
1647 else
1648 {
1649 tree vecstype = get_vectype_for_scalar_type (stype);
1650 stmt_vec_info def_stmt_vinfo;
1651
1652 if (vecstype == NULL_TREE)
1653 return NULL;
1654 def2 = vect_recog_temp_ssa_var (stype, NULL);
1655 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1656 NULL_TREE);
1657 if (ext_def)
1658 {
1659 basic_block new_bb
1660 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1661 gcc_assert (!new_bb);
1662 }
1663 else
1664 {
1665 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1666 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1667 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1668 append_pattern_def_seq (stmt_vinfo, def_stmt);
1669 }
1670
1671 def2 = vect_recog_temp_ssa_var (stype, NULL);
1672 tree mask
1673 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1674 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1675 gimple_assign_lhs (def_stmt),
1676 mask);
1677 if (ext_def)
1678 {
1679 basic_block new_bb
1680 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1681 gcc_assert (!new_bb);
1682 }
1683 else
1684 {
1685 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1686 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1687 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1688 append_pattern_def_seq (stmt_vinfo, def_stmt);
1689 }
1690 }
1691
1692 var1 = vect_recog_temp_ssa_var (type, NULL);
1693 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1694 ? LSHIFT_EXPR : RSHIFT_EXPR,
1695 var1, oprnd0, def);
1696 append_pattern_def_seq (stmt_vinfo, def_stmt);
1697
1698 var2 = vect_recog_temp_ssa_var (type, NULL);
1699 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1700 ? RSHIFT_EXPR : LSHIFT_EXPR,
1701 var2, oprnd0, def2);
1702 append_pattern_def_seq (stmt_vinfo, def_stmt);
1703
1704 /* Pattern detected. */
1705 if (dump_enabled_p ())
1706 dump_printf_loc (MSG_NOTE, vect_location,
1707 "vect_recog_rotate_pattern: detected:\n");
1708
1709 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1710 var = vect_recog_temp_ssa_var (type, NULL);
1711 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1712
1713 if (dump_enabled_p ())
1714 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1715
1716 stmts->safe_push (last_stmt);
1717 return pattern_stmt;
1718 }
1719
1720 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1721 vectorized:
1722
1723 type a_t;
1724 TYPE b_T, res_T;
1725
1726 S1 a_t = ;
1727 S2 b_T = ;
1728 S3 res_T = b_T op a_t;
1729
1730 where type 'TYPE' is a type with different size than 'type',
1731 and op is <<, >> or rotate.
1732
1733 Also detect cases:
1734
1735 type a_t;
1736 TYPE b_T, c_T, res_T;
1737
1738 S0 c_T = ;
1739 S1 a_t = (type) c_T;
1740 S2 b_T = ;
1741 S3 res_T = b_T op a_t;
1742
1743 Input/Output:
1744
1745 * STMTS: Contains a stmt from which the pattern search begins,
1746 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1747 with a shift/rotate which has same type on both operands, in the
1748 second case just b_T op c_T, in the first case with added cast
1749 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1750
1751 Output:
1752
1753 * TYPE_IN: The type of the input arguments to the pattern.
1754
1755 * TYPE_OUT: The type of the output of this pattern.
1756
1757 * Return value: A new stmt that will be used to replace the shift/rotate
1758 S3 stmt. */
1759
1760 static gimple
1761 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1762 tree *type_in, tree *type_out)
1763 {
1764 gimple last_stmt = stmts->pop ();
1765 tree oprnd0, oprnd1, lhs, var;
1766 gimple pattern_stmt, def_stmt;
1767 enum tree_code rhs_code;
1768 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1769 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1770 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1771 enum vect_def_type dt;
1772 tree def;
1773
1774 if (!is_gimple_assign (last_stmt))
1775 return NULL;
1776
1777 rhs_code = gimple_assign_rhs_code (last_stmt);
1778 switch (rhs_code)
1779 {
1780 case LSHIFT_EXPR:
1781 case RSHIFT_EXPR:
1782 case LROTATE_EXPR:
1783 case RROTATE_EXPR:
1784 break;
1785 default:
1786 return NULL;
1787 }
1788
1789 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1790 return NULL;
1791
1792 lhs = gimple_assign_lhs (last_stmt);
1793 oprnd0 = gimple_assign_rhs1 (last_stmt);
1794 oprnd1 = gimple_assign_rhs2 (last_stmt);
1795 if (TREE_CODE (oprnd0) != SSA_NAME
1796 || TREE_CODE (oprnd1) != SSA_NAME
1797 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1798 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1799 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1800 || TYPE_PRECISION (TREE_TYPE (lhs))
1801 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1802 return NULL;
1803
1804 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1805 &def, &dt))
1806 return NULL;
1807
1808 if (dt != vect_internal_def)
1809 return NULL;
1810
1811 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1812 *type_out = *type_in;
1813 if (*type_in == NULL_TREE)
1814 return NULL;
1815
1816 def = NULL_TREE;
1817 if (gimple_assign_cast_p (def_stmt))
1818 {
1819 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1820 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1821 && TYPE_PRECISION (TREE_TYPE (rhs1))
1822 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1823 def = rhs1;
1824 }
1825
1826 if (def == NULL_TREE)
1827 {
1828 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1829 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1830 NULL_TREE);
1831 new_pattern_def_seq (stmt_vinfo, def_stmt);
1832 }
1833
1834 /* Pattern detected. */
1835 if (dump_enabled_p ())
1836 dump_printf_loc (MSG_NOTE, vect_location,
1837 "vect_recog_vector_vector_shift_pattern: detected:\n");
1838
1839 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1840 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1841 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1842
1843 if (dump_enabled_p ())
1844 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1845
1846 stmts->safe_push (last_stmt);
1847 return pattern_stmt;
1848 }
1849
1850 /* Detect a signed division by a constant that wouldn't be
1851 otherwise vectorized:
1852
1853 type a_t, b_t;
1854
1855 S1 a_t = b_t / N;
1856
1857 where type 'type' is an integral type and N is a constant.
1858
1859 Similarly handle modulo by a constant:
1860
1861 S4 a_t = b_t % N;
1862
1863 Input/Output:
1864
1865 * STMTS: Contains a stmt from which the pattern search begins,
1866 i.e. the division stmt. S1 is replaced by if N is a power
1867 of two constant and type is signed:
1868 S3 y_t = b_t < 0 ? N - 1 : 0;
1869 S2 x_t = b_t + y_t;
1870 S1' a_t = x_t >> log2 (N);
1871
1872 S4 is replaced if N is a power of two constant and
1873 type is signed by (where *_T temporaries have unsigned type):
1874 S9 y_T = b_t < 0 ? -1U : 0U;
1875 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1876 S7 z_t = (type) z_T;
1877 S6 w_t = b_t + z_t;
1878 S5 x_t = w_t & (N - 1);
1879 S4' a_t = x_t - z_t;
1880
1881 Output:
1882
1883 * TYPE_IN: The type of the input arguments to the pattern.
1884
1885 * TYPE_OUT: The type of the output of this pattern.
1886
1887 * Return value: A new stmt that will be used to replace the division
1888 S1 or modulo S4 stmt. */
1889
1890 static gimple
1891 vect_recog_divmod_pattern (vec<gimple> *stmts,
1892 tree *type_in, tree *type_out)
1893 {
1894 gimple last_stmt = stmts->pop ();
1895 tree oprnd0, oprnd1, vectype, itype, cond;
1896 gimple pattern_stmt, def_stmt;
1897 enum tree_code rhs_code;
1898 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1899 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1900 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1901 optab optab;
1902 tree q;
1903 int dummy_int, prec;
1904 stmt_vec_info def_stmt_vinfo;
1905
1906 if (!is_gimple_assign (last_stmt))
1907 return NULL;
1908
1909 rhs_code = gimple_assign_rhs_code (last_stmt);
1910 switch (rhs_code)
1911 {
1912 case TRUNC_DIV_EXPR:
1913 case TRUNC_MOD_EXPR:
1914 break;
1915 default:
1916 return NULL;
1917 }
1918
1919 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1920 return NULL;
1921
1922 oprnd0 = gimple_assign_rhs1 (last_stmt);
1923 oprnd1 = gimple_assign_rhs2 (last_stmt);
1924 itype = TREE_TYPE (oprnd0);
1925 if (TREE_CODE (oprnd0) != SSA_NAME
1926 || TREE_CODE (oprnd1) != INTEGER_CST
1927 || TREE_CODE (itype) != INTEGER_TYPE
1928 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1929 return NULL;
1930
1931 vectype = get_vectype_for_scalar_type (itype);
1932 if (vectype == NULL_TREE)
1933 return NULL;
1934
1935 /* If the target can handle vectorized division or modulo natively,
1936 don't attempt to optimize this. */
1937 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1938 if (optab != unknown_optab)
1939 {
1940 enum machine_mode vec_mode = TYPE_MODE (vectype);
1941 int icode = (int) optab_handler (optab, vec_mode);
1942 if (icode != CODE_FOR_nothing)
1943 return NULL;
1944 }
1945
1946 prec = TYPE_PRECISION (itype);
1947 if (integer_pow2p (oprnd1))
1948 {
1949 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1950 return NULL;
1951
1952 /* Pattern detected. */
1953 if (dump_enabled_p ())
1954 dump_printf_loc (MSG_NOTE, vect_location,
1955 "vect_recog_divmod_pattern: detected:\n");
1956
1957 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1958 build_int_cst (itype, 0));
1959 if (rhs_code == TRUNC_DIV_EXPR)
1960 {
1961 tree var = vect_recog_temp_ssa_var (itype, NULL);
1962 tree shift;
1963 def_stmt
1964 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1965 fold_build2 (MINUS_EXPR, itype,
1966 oprnd1,
1967 build_int_cst (itype,
1968 1)),
1969 build_int_cst (itype, 0));
1970 new_pattern_def_seq (stmt_vinfo, def_stmt);
1971 var = vect_recog_temp_ssa_var (itype, NULL);
1972 def_stmt
1973 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1974 gimple_assign_lhs (def_stmt));
1975 append_pattern_def_seq (stmt_vinfo, def_stmt);
1976
1977 shift = build_int_cst (itype, tree_log2 (oprnd1));
1978 pattern_stmt
1979 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1980 vect_recog_temp_ssa_var (itype,
1981 NULL),
1982 var, shift);
1983 }
1984 else
1985 {
1986 tree signmask;
1987 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1988 if (compare_tree_int (oprnd1, 2) == 0)
1989 {
1990 signmask = vect_recog_temp_ssa_var (itype, NULL);
1991 def_stmt
1992 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1993 build_int_cst (itype, 1),
1994 build_int_cst (itype, 0));
1995 append_pattern_def_seq (stmt_vinfo, def_stmt);
1996 }
1997 else
1998 {
1999 tree utype
2000 = build_nonstandard_integer_type (prec, 1);
2001 tree vecutype = get_vectype_for_scalar_type (utype);
2002 tree shift
2003 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2004 - tree_log2 (oprnd1));
2005 tree var = vect_recog_temp_ssa_var (utype, NULL);
2006
2007 def_stmt
2008 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2009 build_int_cst (utype, -1),
2010 build_int_cst (utype, 0));
2011 def_stmt_vinfo
2012 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2013 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2014 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2015 append_pattern_def_seq (stmt_vinfo, def_stmt);
2016 var = vect_recog_temp_ssa_var (utype, NULL);
2017 def_stmt
2018 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2019 gimple_assign_lhs (def_stmt),
2020 shift);
2021 def_stmt_vinfo
2022 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2023 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2024 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2025 append_pattern_def_seq (stmt_vinfo, def_stmt);
2026 signmask = vect_recog_temp_ssa_var (itype, NULL);
2027 def_stmt
2028 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2029 NULL_TREE);
2030 append_pattern_def_seq (stmt_vinfo, def_stmt);
2031 }
2032 def_stmt
2033 = gimple_build_assign_with_ops (PLUS_EXPR,
2034 vect_recog_temp_ssa_var (itype,
2035 NULL),
2036 oprnd0, signmask);
2037 append_pattern_def_seq (stmt_vinfo, def_stmt);
2038 def_stmt
2039 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2040 vect_recog_temp_ssa_var (itype,
2041 NULL),
2042 gimple_assign_lhs (def_stmt),
2043 fold_build2 (MINUS_EXPR, itype,
2044 oprnd1,
2045 build_int_cst (itype,
2046 1)));
2047 append_pattern_def_seq (stmt_vinfo, def_stmt);
2048
2049 pattern_stmt
2050 = gimple_build_assign_with_ops (MINUS_EXPR,
2051 vect_recog_temp_ssa_var (itype,
2052 NULL),
2053 gimple_assign_lhs (def_stmt),
2054 signmask);
2055 }
2056
2057 if (dump_enabled_p ())
2058 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2059 0);
2060
2061 stmts->safe_push (last_stmt);
2062
2063 *type_in = vectype;
2064 *type_out = vectype;
2065 return pattern_stmt;
2066 }
2067
2068 if (prec > HOST_BITS_PER_WIDE_INT
2069 || integer_zerop (oprnd1))
2070 return NULL;
2071
2072 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2073 return NULL;
2074
2075 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2076
2077 if (TYPE_UNSIGNED (itype))
2078 {
2079 unsigned HOST_WIDE_INT mh, ml;
2080 int pre_shift, post_shift;
2081 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2082 & GET_MODE_MASK (TYPE_MODE (itype)));
2083 tree t1, t2, t3, t4;
2084
2085 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2086 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2087 return NULL;
2088
2089 /* Find a suitable multiplier and right shift count
2090 instead of multiplying with D. */
2091 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2092
2093 /* If the suggested multiplier is more than SIZE bits, we can do better
2094 for even divisors, using an initial right shift. */
2095 if (mh != 0 && (d & 1) == 0)
2096 {
2097 pre_shift = floor_log2 (d & -d);
2098 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2099 &ml, &post_shift, &dummy_int);
2100 gcc_assert (!mh);
2101 }
2102 else
2103 pre_shift = 0;
2104
2105 if (mh != 0)
2106 {
2107 if (post_shift - 1 >= prec)
2108 return NULL;
2109
2110 /* t1 = oprnd0 h* ml;
2111 t2 = oprnd0 - t1;
2112 t3 = t2 >> 1;
2113 t4 = t1 + t3;
2114 q = t4 >> (post_shift - 1); */
2115 t1 = vect_recog_temp_ssa_var (itype, NULL);
2116 def_stmt
2117 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2118 build_int_cst (itype, ml));
2119 append_pattern_def_seq (stmt_vinfo, def_stmt);
2120
2121 t2 = vect_recog_temp_ssa_var (itype, NULL);
2122 def_stmt
2123 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2124 append_pattern_def_seq (stmt_vinfo, def_stmt);
2125
2126 t3 = vect_recog_temp_ssa_var (itype, NULL);
2127 def_stmt
2128 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2129 integer_one_node);
2130 append_pattern_def_seq (stmt_vinfo, def_stmt);
2131
2132 t4 = vect_recog_temp_ssa_var (itype, NULL);
2133 def_stmt
2134 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2135
2136 if (post_shift != 1)
2137 {
2138 append_pattern_def_seq (stmt_vinfo, def_stmt);
2139
2140 q = vect_recog_temp_ssa_var (itype, NULL);
2141 pattern_stmt
2142 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2143 build_int_cst (itype,
2144 post_shift
2145 - 1));
2146 }
2147 else
2148 {
2149 q = t4;
2150 pattern_stmt = def_stmt;
2151 }
2152 }
2153 else
2154 {
2155 if (pre_shift >= prec || post_shift >= prec)
2156 return NULL;
2157
2158 /* t1 = oprnd0 >> pre_shift;
2159 t2 = t1 h* ml;
2160 q = t2 >> post_shift; */
2161 if (pre_shift)
2162 {
2163 t1 = vect_recog_temp_ssa_var (itype, NULL);
2164 def_stmt
2165 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2166 build_int_cst (NULL,
2167 pre_shift));
2168 append_pattern_def_seq (stmt_vinfo, def_stmt);
2169 }
2170 else
2171 t1 = oprnd0;
2172
2173 t2 = vect_recog_temp_ssa_var (itype, NULL);
2174 def_stmt
2175 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2176 build_int_cst (itype, ml));
2177
2178 if (post_shift)
2179 {
2180 append_pattern_def_seq (stmt_vinfo, def_stmt);
2181
2182 q = vect_recog_temp_ssa_var (itype, NULL);
2183 def_stmt
2184 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2185 build_int_cst (itype,
2186 post_shift));
2187 }
2188 else
2189 q = t2;
2190
2191 pattern_stmt = def_stmt;
2192 }
2193 }
2194 else
2195 {
2196 unsigned HOST_WIDE_INT ml;
2197 int post_shift;
2198 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2199 unsigned HOST_WIDE_INT abs_d;
2200 bool add = false;
2201 tree t1, t2, t3, t4;
2202
2203 /* Give up for -1. */
2204 if (d == -1)
2205 return NULL;
2206
2207 /* Since d might be INT_MIN, we have to cast to
2208 unsigned HOST_WIDE_INT before negating to avoid
2209 undefined signed overflow. */
2210 abs_d = (d >= 0
2211 ? (unsigned HOST_WIDE_INT) d
2212 : - (unsigned HOST_WIDE_INT) d);
2213
2214 /* n rem d = n rem -d */
2215 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2216 {
2217 d = abs_d;
2218 oprnd1 = build_int_cst (itype, abs_d);
2219 }
2220 else if (HOST_BITS_PER_WIDE_INT >= prec
2221 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2222 /* This case is not handled correctly below. */
2223 return NULL;
2224
2225 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2226 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2227 {
2228 add = true;
2229 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2230 }
2231 if (post_shift >= prec)
2232 return NULL;
2233
2234 /* t1 = oprnd0 h* ml; */
2235 t1 = vect_recog_temp_ssa_var (itype, NULL);
2236 def_stmt
2237 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2238 build_int_cst (itype, ml));
2239
2240 if (add)
2241 {
2242 /* t2 = t1 + oprnd0; */
2243 append_pattern_def_seq (stmt_vinfo, def_stmt);
2244 t2 = vect_recog_temp_ssa_var (itype, NULL);
2245 def_stmt
2246 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2247 }
2248 else
2249 t2 = t1;
2250
2251 if (post_shift)
2252 {
2253 /* t3 = t2 >> post_shift; */
2254 append_pattern_def_seq (stmt_vinfo, def_stmt);
2255 t3 = vect_recog_temp_ssa_var (itype, NULL);
2256 def_stmt
2257 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2258 build_int_cst (itype, post_shift));
2259 }
2260 else
2261 t3 = t2;
2262
2263 double_int oprnd0_min, oprnd0_max;
2264 int msb = 1;
2265 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2266 {
2267 if (!oprnd0_min.is_negative ())
2268 msb = 0;
2269 else if (oprnd0_max.is_negative ())
2270 msb = -1;
2271 }
2272
2273 if (msb == 0 && d >= 0)
2274 {
2275 /* q = t3; */
2276 q = t3;
2277 pattern_stmt = def_stmt;
2278 }
2279 else
2280 {
2281 /* t4 = oprnd0 >> (prec - 1);
2282 or if we know from VRP that oprnd0 >= 0
2283 t4 = 0;
2284 or if we know from VRP that oprnd0 < 0
2285 t4 = -1; */
2286 append_pattern_def_seq (stmt_vinfo, def_stmt);
2287 t4 = vect_recog_temp_ssa_var (itype, NULL);
2288 if (msb != 1)
2289 def_stmt
2290 = gimple_build_assign_with_ops (INTEGER_CST,
2291 t4, build_int_cst (itype, msb),
2292 NULL_TREE);
2293 else
2294 def_stmt
2295 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2296 build_int_cst (itype, prec - 1));
2297 append_pattern_def_seq (stmt_vinfo, def_stmt);
2298
2299 /* q = t3 - t4; or q = t4 - t3; */
2300 q = vect_recog_temp_ssa_var (itype, NULL);
2301 pattern_stmt
2302 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2303 d < 0 ? t3 : t4);
2304 }
2305 }
2306
2307 if (rhs_code == TRUNC_MOD_EXPR)
2308 {
2309 tree r, t1;
2310
2311 /* We divided. Now finish by:
2312 t1 = q * oprnd1;
2313 r = oprnd0 - t1; */
2314 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2315
2316 t1 = vect_recog_temp_ssa_var (itype, NULL);
2317 def_stmt
2318 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2319 append_pattern_def_seq (stmt_vinfo, def_stmt);
2320
2321 r = vect_recog_temp_ssa_var (itype, NULL);
2322 pattern_stmt
2323 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2324 }
2325
2326 /* Pattern detected. */
2327 if (dump_enabled_p ())
2328 {
2329 dump_printf_loc (MSG_NOTE, vect_location,
2330 "vect_recog_divmod_pattern: detected: ");
2331 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2332 dump_printf (MSG_NOTE, "\n");
2333 }
2334
2335 stmts->safe_push (last_stmt);
2336
2337 *type_in = vectype;
2338 *type_out = vectype;
2339 return pattern_stmt;
2340 }
2341
2342 /* Function vect_recog_mixed_size_cond_pattern
2343
2344 Try to find the following pattern:
2345
2346 type x_t, y_t;
2347 TYPE a_T, b_T, c_T;
2348 loop:
2349 S1 a_T = x_t CMP y_t ? b_T : c_T;
2350
2351 where type 'TYPE' is an integral type which has different size
2352 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2353 than 'type', the constants need to fit into an integer type
2354 with the same width as 'type') or results of conversion from 'type'.
2355
2356 Input:
2357
2358 * LAST_STMT: A stmt from which the pattern search begins.
2359
2360 Output:
2361
2362 * TYPE_IN: The type of the input arguments to the pattern.
2363
2364 * TYPE_OUT: The type of the output of this pattern.
2365
2366 * Return value: A new stmt that will be used to replace the pattern.
2367 Additionally a def_stmt is added.
2368
2369 a_it = x_t CMP y_t ? b_it : c_it;
2370 a_T = (TYPE) a_it; */
2371
2372 static gimple
2373 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2374 tree *type_out)
2375 {
2376 gimple last_stmt = (*stmts)[0];
2377 tree cond_expr, then_clause, else_clause;
2378 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2379 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2380 enum machine_mode cmpmode;
2381 gimple pattern_stmt, def_stmt;
2382 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2383 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2384 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2385 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2386 bool promotion;
2387 tree comp_scalar_type;
2388
2389 if (!is_gimple_assign (last_stmt)
2390 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2391 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2392 return NULL;
2393
2394 cond_expr = gimple_assign_rhs1 (last_stmt);
2395 then_clause = gimple_assign_rhs2 (last_stmt);
2396 else_clause = gimple_assign_rhs3 (last_stmt);
2397
2398 if (!COMPARISON_CLASS_P (cond_expr))
2399 return NULL;
2400
2401 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2402 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2403 if (comp_vectype == NULL_TREE)
2404 return NULL;
2405
2406 type = gimple_expr_type (last_stmt);
2407 if (types_compatible_p (type, comp_scalar_type)
2408 || ((TREE_CODE (then_clause) != INTEGER_CST
2409 || TREE_CODE (else_clause) != INTEGER_CST)
2410 && !INTEGRAL_TYPE_P (comp_scalar_type))
2411 || !INTEGRAL_TYPE_P (type))
2412 return NULL;
2413
2414 if ((TREE_CODE (then_clause) != INTEGER_CST
2415 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2416 &def_stmt0, &promotion))
2417 || (TREE_CODE (else_clause) != INTEGER_CST
2418 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2419 &def_stmt1, &promotion)))
2420 return NULL;
2421
2422 if (orig_type0 && orig_type1
2423 && !types_compatible_p (orig_type0, orig_type1))
2424 return NULL;
2425
2426 if (orig_type0)
2427 {
2428 if (!types_compatible_p (orig_type0, comp_scalar_type))
2429 return NULL;
2430 then_clause = gimple_assign_rhs1 (def_stmt0);
2431 itype = orig_type0;
2432 }
2433
2434 if (orig_type1)
2435 {
2436 if (!types_compatible_p (orig_type1, comp_scalar_type))
2437 return NULL;
2438 else_clause = gimple_assign_rhs1 (def_stmt1);
2439 itype = orig_type1;
2440 }
2441
2442 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2443
2444 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2445 return NULL;
2446
2447 vectype = get_vectype_for_scalar_type (type);
2448 if (vectype == NULL_TREE)
2449 return NULL;
2450
2451 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2452 return NULL;
2453
2454 if (itype == NULL_TREE)
2455 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2456 TYPE_UNSIGNED (type));
2457
2458 if (itype == NULL_TREE
2459 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2460 return NULL;
2461
2462 vecitype = get_vectype_for_scalar_type (itype);
2463 if (vecitype == NULL_TREE)
2464 return NULL;
2465
2466 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2467 return NULL;
2468
2469 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2470 {
2471 if ((TREE_CODE (then_clause) == INTEGER_CST
2472 && !int_fits_type_p (then_clause, itype))
2473 || (TREE_CODE (else_clause) == INTEGER_CST
2474 && !int_fits_type_p (else_clause, itype)))
2475 return NULL;
2476 }
2477
2478 def_stmt
2479 = gimple_build_assign_with_ops (COND_EXPR,
2480 vect_recog_temp_ssa_var (itype, NULL),
2481 unshare_expr (cond_expr),
2482 fold_convert (itype, then_clause),
2483 fold_convert (itype, else_clause));
2484 pattern_stmt
2485 = gimple_build_assign_with_ops (NOP_EXPR,
2486 vect_recog_temp_ssa_var (type, NULL),
2487 gimple_assign_lhs (def_stmt), NULL_TREE);
2488
2489 new_pattern_def_seq (stmt_vinfo, def_stmt);
2490 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2491 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2492 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2493 *type_in = vecitype;
2494 *type_out = vectype;
2495
2496 if (dump_enabled_p ())
2497 dump_printf_loc (MSG_NOTE, vect_location,
2498 "vect_recog_mixed_size_cond_pattern: detected:\n");
2499
2500 return pattern_stmt;
2501 }
2502
2503
2504 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2505 true if bool VAR can be optimized that way. */
2506
2507 static bool
2508 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2509 {
2510 gimple def_stmt;
2511 enum vect_def_type dt;
2512 tree def, rhs1;
2513 enum tree_code rhs_code;
2514
2515 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2516 &dt))
2517 return false;
2518
2519 if (dt != vect_internal_def)
2520 return false;
2521
2522 if (!is_gimple_assign (def_stmt))
2523 return false;
2524
2525 if (!has_single_use (def))
2526 return false;
2527
2528 rhs1 = gimple_assign_rhs1 (def_stmt);
2529 rhs_code = gimple_assign_rhs_code (def_stmt);
2530 switch (rhs_code)
2531 {
2532 case SSA_NAME:
2533 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2534
2535 CASE_CONVERT:
2536 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2537 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2538 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2539 return false;
2540 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2541
2542 case BIT_NOT_EXPR:
2543 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2544
2545 case BIT_AND_EXPR:
2546 case BIT_IOR_EXPR:
2547 case BIT_XOR_EXPR:
2548 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2549 return false;
2550 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2551 bb_vinfo);
2552
2553 default:
2554 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2555 {
2556 tree vecitype, comp_vectype;
2557
2558 /* If the comparison can throw, then is_gimple_condexpr will be
2559 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2560 if (stmt_could_throw_p (def_stmt))
2561 return false;
2562
2563 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2564 if (comp_vectype == NULL_TREE)
2565 return false;
2566
2567 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2568 {
2569 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2570 tree itype
2571 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2572 vecitype = get_vectype_for_scalar_type (itype);
2573 if (vecitype == NULL_TREE)
2574 return false;
2575 }
2576 else
2577 vecitype = comp_vectype;
2578 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2579 }
2580 return false;
2581 }
2582 }
2583
2584
2585 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2586 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2587 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2588
2589 static tree
2590 adjust_bool_pattern_cast (tree type, tree var)
2591 {
2592 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2593 gimple cast_stmt, pattern_stmt;
2594
2595 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2596 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2597 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2598 cast_stmt
2599 = gimple_build_assign_with_ops (NOP_EXPR,
2600 vect_recog_temp_ssa_var (type, NULL),
2601 gimple_assign_lhs (pattern_stmt),
2602 NULL_TREE);
2603 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2604 return gimple_assign_lhs (cast_stmt);
2605 }
2606
2607
2608 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2609 recursively. VAR is an SSA_NAME that should be transformed from bool
2610 to a wider integer type, OUT_TYPE is the desired final integer type of
2611 the whole pattern, TRUEVAL should be NULL unless optimizing
2612 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2613 in the then_clause, STMTS is where statements with added pattern stmts
2614 should be pushed to. */
2615
2616 static tree
2617 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2618 vec<gimple> *stmts)
2619 {
2620 gimple stmt = SSA_NAME_DEF_STMT (var);
2621 enum tree_code rhs_code, def_rhs_code;
2622 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2623 location_t loc;
2624 gimple pattern_stmt, def_stmt;
2625
2626 rhs1 = gimple_assign_rhs1 (stmt);
2627 rhs2 = gimple_assign_rhs2 (stmt);
2628 rhs_code = gimple_assign_rhs_code (stmt);
2629 loc = gimple_location (stmt);
2630 switch (rhs_code)
2631 {
2632 case SSA_NAME:
2633 CASE_CONVERT:
2634 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2635 itype = TREE_TYPE (irhs1);
2636 pattern_stmt
2637 = gimple_build_assign_with_ops (SSA_NAME,
2638 vect_recog_temp_ssa_var (itype, NULL),
2639 irhs1, NULL_TREE);
2640 break;
2641
2642 case BIT_NOT_EXPR:
2643 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2644 itype = TREE_TYPE (irhs1);
2645 pattern_stmt
2646 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2647 vect_recog_temp_ssa_var (itype, NULL),
2648 irhs1, build_int_cst (itype, 1));
2649 break;
2650
2651 case BIT_AND_EXPR:
2652 /* Try to optimize x = y & (a < b ? 1 : 0); into
2653 x = (a < b ? y : 0);
2654
2655 E.g. for:
2656 bool a_b, b_b, c_b;
2657 TYPE d_T;
2658
2659 S1 a_b = x1 CMP1 y1;
2660 S2 b_b = x2 CMP2 y2;
2661 S3 c_b = a_b & b_b;
2662 S4 d_T = (TYPE) c_b;
2663
2664 we would normally emit:
2665
2666 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2667 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2668 S3' c_T = a_T & b_T;
2669 S4' d_T = c_T;
2670
2671 but we can save one stmt by using the
2672 result of one of the COND_EXPRs in the other COND_EXPR and leave
2673 BIT_AND_EXPR stmt out:
2674
2675 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2676 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2677 S4' f_T = c_T;
2678
2679 At least when VEC_COND_EXPR is implemented using masks
2680 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2681 computes the comparison masks and ands it, in one case with
2682 all ones vector, in the other case with a vector register.
2683 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2684 often more expensive. */
2685 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2686 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2687 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2688 {
2689 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2690 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2691 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2692 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2693 {
2694 gimple tstmt;
2695 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2696 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2697 tstmt = stmts->pop ();
2698 gcc_assert (tstmt == def_stmt);
2699 stmts->quick_push (stmt);
2700 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2701 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2702 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2703 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2704 return irhs2;
2705 }
2706 else
2707 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2708 goto and_ior_xor;
2709 }
2710 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2711 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2712 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2713 {
2714 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2715 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2716 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2717 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2718 {
2719 gimple tstmt;
2720 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2721 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2722 tstmt = stmts->pop ();
2723 gcc_assert (tstmt == def_stmt);
2724 stmts->quick_push (stmt);
2725 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2726 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2727 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2728 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2729 return irhs1;
2730 }
2731 else
2732 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2733 goto and_ior_xor;
2734 }
2735 /* FALLTHRU */
2736 case BIT_IOR_EXPR:
2737 case BIT_XOR_EXPR:
2738 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2739 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2740 and_ior_xor:
2741 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2742 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2743 {
2744 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2745 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2746 int out_prec = TYPE_PRECISION (out_type);
2747 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2748 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2749 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2750 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2751 else
2752 {
2753 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2754 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2755 }
2756 }
2757 itype = TREE_TYPE (irhs1);
2758 pattern_stmt
2759 = gimple_build_assign_with_ops (rhs_code,
2760 vect_recog_temp_ssa_var (itype, NULL),
2761 irhs1, irhs2);
2762 break;
2763
2764 default:
2765 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2766 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2767 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2768 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2769 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2770 {
2771 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2772 itype
2773 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2774 }
2775 else
2776 itype = TREE_TYPE (rhs1);
2777 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2778 if (trueval == NULL_TREE)
2779 trueval = build_int_cst (itype, 1);
2780 else
2781 gcc_checking_assert (useless_type_conversion_p (itype,
2782 TREE_TYPE (trueval)));
2783 pattern_stmt
2784 = gimple_build_assign_with_ops (COND_EXPR,
2785 vect_recog_temp_ssa_var (itype, NULL),
2786 cond_expr, trueval,
2787 build_int_cst (itype, 0));
2788 break;
2789 }
2790
2791 stmts->safe_push (stmt);
2792 gimple_set_location (pattern_stmt, loc);
2793 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2794 return gimple_assign_lhs (pattern_stmt);
2795 }
2796
2797
2798 /* Function vect_recog_bool_pattern
2799
2800 Try to find pattern like following:
2801
2802 bool a_b, b_b, c_b, d_b, e_b;
2803 TYPE f_T;
2804 loop:
2805 S1 a_b = x1 CMP1 y1;
2806 S2 b_b = x2 CMP2 y2;
2807 S3 c_b = a_b & b_b;
2808 S4 d_b = x3 CMP3 y3;
2809 S5 e_b = c_b | d_b;
2810 S6 f_T = (TYPE) e_b;
2811
2812 where type 'TYPE' is an integral type.
2813
2814 Input:
2815
2816 * LAST_STMT: A stmt at the end from which the pattern
2817 search begins, i.e. cast of a bool to
2818 an integer type.
2819
2820 Output:
2821
2822 * TYPE_IN: The type of the input arguments to the pattern.
2823
2824 * TYPE_OUT: The type of the output of this pattern.
2825
2826 * Return value: A new stmt that will be used to replace the pattern.
2827
2828 Assuming size of TYPE is the same as size of all comparisons
2829 (otherwise some casts would be added where needed), the above
2830 sequence we create related pattern stmts:
2831 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2832 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2833 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2834 S5' e_T = c_T | d_T;
2835 S6' f_T = e_T;
2836
2837 Instead of the above S3' we could emit:
2838 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2839 S3' c_T = a_T | b_T;
2840 but the above is more efficient. */
2841
2842 static gimple
2843 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2844 tree *type_out)
2845 {
2846 gimple last_stmt = stmts->pop ();
2847 enum tree_code rhs_code;
2848 tree var, lhs, rhs, vectype;
2849 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2850 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2851 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2852 gimple pattern_stmt;
2853
2854 if (!is_gimple_assign (last_stmt))
2855 return NULL;
2856
2857 var = gimple_assign_rhs1 (last_stmt);
2858 lhs = gimple_assign_lhs (last_stmt);
2859
2860 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2861 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2862 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2863 return NULL;
2864
2865 rhs_code = gimple_assign_rhs_code (last_stmt);
2866 if (CONVERT_EXPR_CODE_P (rhs_code))
2867 {
2868 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2869 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2870 return NULL;
2871 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2872 if (vectype == NULL_TREE)
2873 return NULL;
2874
2875 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2876 return NULL;
2877
2878 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2879 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2880 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2881 pattern_stmt
2882 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2883 else
2884 pattern_stmt
2885 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2886 *type_out = vectype;
2887 *type_in = vectype;
2888 stmts->safe_push (last_stmt);
2889 if (dump_enabled_p ())
2890 dump_printf_loc (MSG_NOTE, vect_location,
2891 "vect_recog_bool_pattern: detected:\n");
2892
2893 return pattern_stmt;
2894 }
2895 else if (rhs_code == SSA_NAME
2896 && STMT_VINFO_DATA_REF (stmt_vinfo))
2897 {
2898 stmt_vec_info pattern_stmt_info;
2899 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2900 gcc_assert (vectype != NULL_TREE);
2901 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2902 return NULL;
2903 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2904 return NULL;
2905
2906 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2907 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2908 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2909 {
2910 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2911 gimple cast_stmt
2912 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2913 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2914 rhs = rhs2;
2915 }
2916 pattern_stmt
2917 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2918 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2919 bb_vinfo);
2920 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2921 STMT_VINFO_DATA_REF (pattern_stmt_info)
2922 = STMT_VINFO_DATA_REF (stmt_vinfo);
2923 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2924 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2925 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2926 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2927 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2928 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2929 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2930 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2931 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2932 *type_out = vectype;
2933 *type_in = vectype;
2934 stmts->safe_push (last_stmt);
2935 if (dump_enabled_p ())
2936 dump_printf_loc (MSG_NOTE, vect_location,
2937 "vect_recog_bool_pattern: detected:\n");
2938 return pattern_stmt;
2939 }
2940 else
2941 return NULL;
2942 }
2943
2944
2945 /* Mark statements that are involved in a pattern. */
2946
2947 static inline void
2948 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2949 tree pattern_vectype)
2950 {
2951 stmt_vec_info pattern_stmt_info, def_stmt_info;
2952 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2953 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2954 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2955 gimple def_stmt;
2956
2957 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2958 if (pattern_stmt_info == NULL)
2959 {
2960 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2961 bb_vinfo);
2962 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2963 }
2964 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2965
2966 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2967 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2968 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2969 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2970 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2971 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2972 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2973 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2974 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2975 {
2976 gimple_stmt_iterator si;
2977 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2978 !gsi_end_p (si); gsi_next (&si))
2979 {
2980 def_stmt = gsi_stmt (si);
2981 def_stmt_info = vinfo_for_stmt (def_stmt);
2982 if (def_stmt_info == NULL)
2983 {
2984 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2985 bb_vinfo);
2986 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2987 }
2988 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2989 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2990 STMT_VINFO_DEF_TYPE (def_stmt_info)
2991 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2992 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2993 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2994 }
2995 }
2996 }
2997
2998 /* Function vect_pattern_recog_1
2999
3000 Input:
3001 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3002 computation pattern.
3003 STMT: A stmt from which the pattern search should start.
3004
3005 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3006 expression that computes the same functionality and can be used to
3007 replace the sequence of stmts that are involved in the pattern.
3008
3009 Output:
3010 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3011 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3012 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3013 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3014 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3015 to the available target pattern.
3016
3017 This function also does some bookkeeping, as explained in the documentation
3018 for vect_recog_pattern. */
3019
3020 static void
3021 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3022 gimple_stmt_iterator si,
3023 vec<gimple> *stmts_to_replace)
3024 {
3025 gimple stmt = gsi_stmt (si), pattern_stmt;
3026 stmt_vec_info stmt_info;
3027 loop_vec_info loop_vinfo;
3028 tree pattern_vectype;
3029 tree type_in, type_out;
3030 enum tree_code code;
3031 int i;
3032 gimple next;
3033
3034 stmts_to_replace->truncate (0);
3035 stmts_to_replace->quick_push (stmt);
3036 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3037 if (!pattern_stmt)
3038 return;
3039
3040 stmt = stmts_to_replace->last ();
3041 stmt_info = vinfo_for_stmt (stmt);
3042 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3043
3044 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3045 {
3046 /* No need to check target support (already checked by the pattern
3047 recognition function). */
3048 pattern_vectype = type_out ? type_out : type_in;
3049 }
3050 else
3051 {
3052 enum machine_mode vec_mode;
3053 enum insn_code icode;
3054 optab optab;
3055
3056 /* Check target support */
3057 type_in = get_vectype_for_scalar_type (type_in);
3058 if (!type_in)
3059 return;
3060 if (type_out)
3061 type_out = get_vectype_for_scalar_type (type_out);
3062 else
3063 type_out = type_in;
3064 if (!type_out)
3065 return;
3066 pattern_vectype = type_out;
3067
3068 if (is_gimple_assign (pattern_stmt))
3069 code = gimple_assign_rhs_code (pattern_stmt);
3070 else
3071 {
3072 gcc_assert (is_gimple_call (pattern_stmt));
3073 code = CALL_EXPR;
3074 }
3075
3076 optab = optab_for_tree_code (code, type_in, optab_default);
3077 vec_mode = TYPE_MODE (type_in);
3078 if (!optab
3079 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3080 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3081 return;
3082 }
3083
3084 /* Found a vectorizable pattern. */
3085 if (dump_enabled_p ())
3086 {
3087 dump_printf_loc (MSG_NOTE, vect_location,
3088 "pattern recognized: ");
3089 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3090 dump_printf (MSG_NOTE, "\n");
3091 }
3092
3093 /* Mark the stmts that are involved in the pattern. */
3094 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3095
3096 /* Patterns cannot be vectorized using SLP, because they change the order of
3097 computation. */
3098 if (loop_vinfo)
3099 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3100 if (next == stmt)
3101 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3102
3103 /* It is possible that additional pattern stmts are created and inserted in
3104 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3105 relevant statements. */
3106 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3107 && (unsigned) i < (stmts_to_replace->length () - 1);
3108 i++)
3109 {
3110 stmt_info = vinfo_for_stmt (stmt);
3111 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3112 if (dump_enabled_p ())
3113 {
3114 dump_printf_loc (MSG_NOTE, vect_location,
3115 "additional pattern stmt: ");
3116 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3117 dump_printf (MSG_NOTE, "\n");
3118 }
3119
3120 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3121 }
3122 }
3123
3124
3125 /* Function vect_pattern_recog
3126
3127 Input:
3128 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3129 computation idioms.
3130
3131 Output - for each computation idiom that is detected we create a new stmt
3132 that provides the same functionality and that can be vectorized. We
3133 also record some information in the struct_stmt_info of the relevant
3134 stmts, as explained below:
3135
3136 At the entry to this function we have the following stmts, with the
3137 following initial value in the STMT_VINFO fields:
3138
3139 stmt in_pattern_p related_stmt vec_stmt
3140 S1: a_i = .... - - -
3141 S2: a_2 = ..use(a_i).. - - -
3142 S3: a_1 = ..use(a_2).. - - -
3143 S4: a_0 = ..use(a_1).. - - -
3144 S5: ... = ..use(a_0).. - - -
3145
3146 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3147 represented by a single stmt. We then:
3148 - create a new stmt S6 equivalent to the pattern (the stmt is not
3149 inserted into the code)
3150 - fill in the STMT_VINFO fields as follows:
3151
3152 in_pattern_p related_stmt vec_stmt
3153 S1: a_i = .... - - -
3154 S2: a_2 = ..use(a_i).. - - -
3155 S3: a_1 = ..use(a_2).. - - -
3156 S4: a_0 = ..use(a_1).. true S6 -
3157 '---> S6: a_new = .... - S4 -
3158 S5: ... = ..use(a_0).. - - -
3159
3160 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3161 to each other through the RELATED_STMT field).
3162
3163 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3164 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3165 remain irrelevant unless used by stmts other than S4.
3166
3167 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3168 (because they are marked as irrelevant). It will vectorize S6, and record
3169 a pointer to the new vector stmt VS6 from S6 (as usual).
3170 S4 will be skipped, and S5 will be vectorized as usual:
3171
3172 in_pattern_p related_stmt vec_stmt
3173 S1: a_i = .... - - -
3174 S2: a_2 = ..use(a_i).. - - -
3175 S3: a_1 = ..use(a_2).. - - -
3176 > VS6: va_new = .... - - -
3177 S4: a_0 = ..use(a_1).. true S6 VS6
3178 '---> S6: a_new = .... - S4 VS6
3179 > VS5: ... = ..vuse(va_new).. - - -
3180 S5: ... = ..use(a_0).. - - -
3181
3182 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3183 elsewhere), and we'll end up with:
3184
3185 VS6: va_new = ....
3186 VS5: ... = ..vuse(va_new)..
3187
3188 In case of more than one pattern statements, e.g., widen-mult with
3189 intermediate type:
3190
3191 S1 a_t = ;
3192 S2 a_T = (TYPE) a_t;
3193 '--> S3: a_it = (interm_type) a_t;
3194 S4 prod_T = a_T * CONST;
3195 '--> S5: prod_T' = a_it w* CONST;
3196
3197 there may be other users of a_T outside the pattern. In that case S2 will
3198 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3199 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3200 be recorded in S3. */
3201
3202 void
3203 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3204 {
3205 struct loop *loop;
3206 basic_block *bbs;
3207 unsigned int nbbs;
3208 gimple_stmt_iterator si;
3209 unsigned int i, j;
3210 vect_recog_func_ptr vect_recog_func;
3211 stack_vec<gimple, 1> stmts_to_replace;
3212 gimple stmt;
3213
3214 if (dump_enabled_p ())
3215 dump_printf_loc (MSG_NOTE, vect_location,
3216 "=== vect_pattern_recog ===\n");
3217
3218 if (loop_vinfo)
3219 {
3220 loop = LOOP_VINFO_LOOP (loop_vinfo);
3221 bbs = LOOP_VINFO_BBS (loop_vinfo);
3222 nbbs = loop->num_nodes;
3223 }
3224 else
3225 {
3226 bbs = &BB_VINFO_BB (bb_vinfo);
3227 nbbs = 1;
3228 }
3229
3230 /* Scan through the loop stmts, applying the pattern recognition
3231 functions starting at each stmt visited: */
3232 for (i = 0; i < nbbs; i++)
3233 {
3234 basic_block bb = bbs[i];
3235 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3236 {
3237 if (bb_vinfo && (stmt = gsi_stmt (si))
3238 && vinfo_for_stmt (stmt)
3239 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3240 continue;
3241
3242 /* Scan over all generic vect_recog_xxx_pattern functions. */
3243 for (j = 0; j < NUM_PATTERNS; j++)
3244 {
3245 vect_recog_func = vect_vect_recog_func_ptrs[j];
3246 vect_pattern_recog_1 (vect_recog_func, si,
3247 &stmts_to_replace);
3248 }
3249 }
3250 }
3251 }