]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-patterns.c
2014-10-27 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "stor-layout.h"
27 #include "target.h"
28 #include "predict.h"
29 #include "vec.h"
30 #include "hashtab.h"
31 #include "hash-set.h"
32 #include "machmode.h"
33 #include "hard-reg-set.h"
34 #include "input.h"
35 #include "function.h"
36 #include "dominance.h"
37 #include "basic-block.h"
38 #include "gimple-pretty-print.h"
39 #include "tree-ssa-alias.h"
40 #include "internal-fn.h"
41 #include "tree-eh.h"
42 #include "gimple-expr.h"
43 #include "is-a.h"
44 #include "gimple.h"
45 #include "gimplify.h"
46 #include "gimple-iterator.h"
47 #include "gimple-ssa.h"
48 #include "tree-phinodes.h"
49 #include "ssa-iterators.h"
50 #include "stringpool.h"
51 #include "tree-ssanames.h"
52 #include "cfgloop.h"
53 #include "expr.h"
54 #include "optabs.h"
55 #include "params.h"
56 #include "tree-data-ref.h"
57 #include "tree-vectorizer.h"
58 #include "recog.h" /* FIXME: for insn_data */
59 #include "diagnostic-core.h"
60 #include "dumpfile.h"
61 #include "builtins.h"
62
63 /* Pattern recognition functions */
64 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
65 tree *);
66 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
67 tree *);
68 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
69 tree *);
70 static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
71 tree *);
72 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
73 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
74 tree *);
75 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
76 tree *, tree *);
77 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
78 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
79 tree *, tree *);
80 static gimple vect_recog_divmod_pattern (vec<gimple> *,
81 tree *, tree *);
82 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
83 tree *, tree *);
84 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
85 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
86 vect_recog_widen_mult_pattern,
87 vect_recog_widen_sum_pattern,
88 vect_recog_dot_prod_pattern,
89 vect_recog_sad_pattern,
90 vect_recog_pow_pattern,
91 vect_recog_widen_shift_pattern,
92 vect_recog_over_widening_pattern,
93 vect_recog_rotate_pattern,
94 vect_recog_vector_vector_shift_pattern,
95 vect_recog_divmod_pattern,
96 vect_recog_mixed_size_cond_pattern,
97 vect_recog_bool_pattern};
98
99 static inline void
100 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
101 {
102 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
103 stmt);
104 }
105
106 static inline void
107 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
108 {
109 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
110 append_pattern_def_seq (stmt_info, stmt);
111 }
112
113 /* Check whether STMT2 is in the same loop or basic block as STMT1.
114 Which of the two applies depends on whether we're currently doing
115 loop-based or basic-block-based vectorization, as determined by
116 the vinfo_for_stmt for STMT1 (which must be defined).
117
118 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
119 to be defined as well. */
120
121 static bool
122 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
123 {
124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
125 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
126 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
127
128 if (!gimple_bb (stmt2))
129 return false;
130
131 if (loop_vinfo)
132 {
133 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
134 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
135 return false;
136 }
137 else
138 {
139 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
140 || gimple_code (stmt2) == GIMPLE_PHI)
141 return false;
142 }
143
144 gcc_assert (vinfo_for_stmt (stmt2));
145 return true;
146 }
147
148 /* If the LHS of DEF_STMT has a single use, and that statement is
149 in the same loop or basic block, return it. */
150
151 static gimple
152 vect_single_imm_use (gimple def_stmt)
153 {
154 tree lhs = gimple_assign_lhs (def_stmt);
155 use_operand_p use_p;
156 gimple use_stmt;
157
158 if (!single_imm_use (lhs, &use_p, &use_stmt))
159 return NULL;
160
161 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
162 return NULL;
163
164 return use_stmt;
165 }
166
167 /* Check whether NAME, an ssa-name used in USE_STMT,
168 is a result of a type promotion, such that:
169 DEF_STMT: NAME = NOP (name0)
170 If CHECK_SIGN is TRUE, check that either both types are signed or both are
171 unsigned. */
172
173 static bool
174 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
175 tree *orig_type, gimple *def_stmt, bool *promotion)
176 {
177 tree dummy;
178 gimple dummy_gimple;
179 loop_vec_info loop_vinfo;
180 stmt_vec_info stmt_vinfo;
181 tree type = TREE_TYPE (name);
182 tree oprnd0;
183 enum vect_def_type dt;
184 tree def;
185 bb_vec_info bb_vinfo;
186
187 stmt_vinfo = vinfo_for_stmt (use_stmt);
188 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
189 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
190 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
191 &def, &dt))
192 return false;
193
194 if (dt != vect_internal_def
195 && dt != vect_external_def && dt != vect_constant_def)
196 return false;
197
198 if (!*def_stmt)
199 return false;
200
201 if (!is_gimple_assign (*def_stmt))
202 return false;
203
204 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
205 return false;
206
207 oprnd0 = gimple_assign_rhs1 (*def_stmt);
208
209 *orig_type = TREE_TYPE (oprnd0);
210 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
211 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
212 return false;
213
214 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
215 *promotion = true;
216 else
217 *promotion = false;
218
219 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
220 bb_vinfo, &dummy_gimple, &dummy, &dt))
221 return false;
222
223 return true;
224 }
225
226 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
227 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
228
229 static tree
230 vect_recog_temp_ssa_var (tree type, gimple stmt)
231 {
232 return make_temp_ssa_name (type, stmt, "patt");
233 }
234
235 /* Function vect_recog_dot_prod_pattern
236
237 Try to find the following pattern:
238
239 type x_t, y_t;
240 TYPE1 prod;
241 TYPE2 sum = init;
242 loop:
243 sum_0 = phi <init, sum_1>
244 S1 x_t = ...
245 S2 y_t = ...
246 S3 x_T = (TYPE1) x_t;
247 S4 y_T = (TYPE1) y_t;
248 S5 prod = x_T * y_T;
249 [S6 prod = (TYPE2) prod; #optional]
250 S7 sum_1 = prod + sum_0;
251
252 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
253 same size of 'TYPE1' or bigger. This is a special case of a reduction
254 computation.
255
256 Input:
257
258 * STMTS: Contains a stmt from which the pattern search begins. In the
259 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
260 will be detected.
261
262 Output:
263
264 * TYPE_IN: The type of the input arguments to the pattern.
265
266 * TYPE_OUT: The type of the output of this pattern.
267
268 * Return value: A new stmt that will be used to replace the sequence of
269 stmts that constitute the pattern. In this case it will be:
270 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
271
272 Note: The dot-prod idiom is a widening reduction pattern that is
273 vectorized without preserving all the intermediate results. It
274 produces only N/2 (widened) results (by summing up pairs of
275 intermediate results) rather than all N results. Therefore, we
276 cannot allow this pattern when we want to get all the results and in
277 the correct order (as is the case when this computation is in an
278 inner-loop nested in an outer-loop that us being vectorized). */
279
280 static gimple
281 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
282 tree *type_out)
283 {
284 gimple stmt, last_stmt = (*stmts)[0];
285 tree oprnd0, oprnd1;
286 tree oprnd00, oprnd01;
287 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
288 tree type, half_type;
289 gimple pattern_stmt;
290 tree prod_type;
291 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
292 struct loop *loop;
293 tree var;
294 bool promotion;
295
296 if (!loop_info)
297 return NULL;
298
299 loop = LOOP_VINFO_LOOP (loop_info);
300
301 if (!is_gimple_assign (last_stmt))
302 return NULL;
303
304 type = gimple_expr_type (last_stmt);
305
306 /* Look for the following pattern
307 DX = (TYPE1) X;
308 DY = (TYPE1) Y;
309 DPROD = DX * DY;
310 DDPROD = (TYPE2) DPROD;
311 sum_1 = DDPROD + sum_0;
312 In which
313 - DX is double the size of X
314 - DY is double the size of Y
315 - DX, DY, DPROD all have the same type
316 - sum is the same size of DPROD or bigger
317 - sum has been recognized as a reduction variable.
318
319 This is equivalent to:
320 DPROD = X w* Y; #widen mult
321 sum_1 = DPROD w+ sum_0; #widen summation
322 or
323 DPROD = X w* Y; #widen mult
324 sum_1 = DPROD + sum_0; #summation
325 */
326
327 /* Starting from LAST_STMT, follow the defs of its uses in search
328 of the above pattern. */
329
330 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
331 return NULL;
332
333 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
334 {
335 /* Has been detected as widening-summation? */
336
337 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
338 type = gimple_expr_type (stmt);
339 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
340 return NULL;
341 oprnd0 = gimple_assign_rhs1 (stmt);
342 oprnd1 = gimple_assign_rhs2 (stmt);
343 half_type = TREE_TYPE (oprnd0);
344 }
345 else
346 {
347 gimple def_stmt;
348
349 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
350 return NULL;
351 oprnd0 = gimple_assign_rhs1 (last_stmt);
352 oprnd1 = gimple_assign_rhs2 (last_stmt);
353 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
354 || !types_compatible_p (TREE_TYPE (oprnd1), type))
355 return NULL;
356 stmt = last_stmt;
357
358 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
359 &promotion)
360 && promotion)
361 {
362 stmt = def_stmt;
363 oprnd0 = gimple_assign_rhs1 (stmt);
364 }
365 else
366 half_type = type;
367 }
368
369 /* So far so good. Since last_stmt was detected as a (summation) reduction,
370 we know that oprnd1 is the reduction variable (defined by a loop-header
371 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
372 Left to check that oprnd0 is defined by a (widen_)mult_expr */
373 if (TREE_CODE (oprnd0) != SSA_NAME)
374 return NULL;
375
376 prod_type = half_type;
377 stmt = SSA_NAME_DEF_STMT (oprnd0);
378
379 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
380 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
381 return NULL;
382
383 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
384 inside the loop (in case we are analyzing an outer-loop). */
385 if (!is_gimple_assign (stmt))
386 return NULL;
387 stmt_vinfo = vinfo_for_stmt (stmt);
388 gcc_assert (stmt_vinfo);
389 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
390 return NULL;
391 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
392 return NULL;
393 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
394 {
395 /* Has been detected as a widening multiplication? */
396
397 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
398 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
399 return NULL;
400 stmt_vinfo = vinfo_for_stmt (stmt);
401 gcc_assert (stmt_vinfo);
402 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
403 oprnd00 = gimple_assign_rhs1 (stmt);
404 oprnd01 = gimple_assign_rhs2 (stmt);
405 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
406 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
407 }
408 else
409 {
410 tree half_type0, half_type1;
411 gimple def_stmt;
412 tree oprnd0, oprnd1;
413
414 oprnd0 = gimple_assign_rhs1 (stmt);
415 oprnd1 = gimple_assign_rhs2 (stmt);
416 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
417 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
418 return NULL;
419 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
420 &promotion)
421 || !promotion)
422 return NULL;
423 oprnd00 = gimple_assign_rhs1 (def_stmt);
424 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
425 &promotion)
426 || !promotion)
427 return NULL;
428 oprnd01 = gimple_assign_rhs1 (def_stmt);
429 if (!types_compatible_p (half_type0, half_type1))
430 return NULL;
431 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
432 return NULL;
433 }
434
435 half_type = TREE_TYPE (oprnd00);
436 *type_in = half_type;
437 *type_out = type;
438
439 /* Pattern detected. Create a stmt to be used to replace the pattern: */
440 var = vect_recog_temp_ssa_var (type, NULL);
441 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
442 oprnd00, oprnd01, oprnd1);
443
444 if (dump_enabled_p ())
445 {
446 dump_printf_loc (MSG_NOTE, vect_location,
447 "vect_recog_dot_prod_pattern: detected: ");
448 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
449 dump_printf (MSG_NOTE, "\n");
450 }
451
452 /* We don't allow changing the order of the computation in the inner-loop
453 when doing outer-loop vectorization. */
454 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
455
456 return pattern_stmt;
457 }
458
459
460 /* Function vect_recog_sad_pattern
461
462 Try to find the following Sum of Absolute Difference (SAD) pattern:
463
464 type x_t, y_t;
465 signed TYPE1 diff, abs_diff;
466 TYPE2 sum = init;
467 loop:
468 sum_0 = phi <init, sum_1>
469 S1 x_t = ...
470 S2 y_t = ...
471 S3 x_T = (TYPE1) x_t;
472 S4 y_T = (TYPE1) y_t;
473 S5 diff = x_T - y_T;
474 S6 abs_diff = ABS_EXPR <diff>;
475 [S7 abs_diff = (TYPE2) abs_diff; #optional]
476 S8 sum_1 = abs_diff + sum_0;
477
478 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
479 same size of 'TYPE1' or bigger. This is a special case of a reduction
480 computation.
481
482 Input:
483
484 * STMTS: Contains a stmt from which the pattern search begins. In the
485 example, when this function is called with S8, the pattern
486 {S3,S4,S5,S6,S7,S8} will be detected.
487
488 Output:
489
490 * TYPE_IN: The type of the input arguments to the pattern.
491
492 * TYPE_OUT: The type of the output of this pattern.
493
494 * Return value: A new stmt that will be used to replace the sequence of
495 stmts that constitute the pattern. In this case it will be:
496 SAD_EXPR <x_t, y_t, sum_0>
497 */
498
499 static gimple
500 vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
501 tree *type_out)
502 {
503 gimple last_stmt = (*stmts)[0];
504 tree sad_oprnd0, sad_oprnd1;
505 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
506 tree half_type;
507 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
508 struct loop *loop;
509 bool promotion;
510
511 if (!loop_info)
512 return NULL;
513
514 loop = LOOP_VINFO_LOOP (loop_info);
515
516 if (!is_gimple_assign (last_stmt))
517 return NULL;
518
519 tree sum_type = gimple_expr_type (last_stmt);
520
521 /* Look for the following pattern
522 DX = (TYPE1) X;
523 DY = (TYPE1) Y;
524 DDIFF = DX - DY;
525 DAD = ABS_EXPR <DDIFF>;
526 DDPROD = (TYPE2) DPROD;
527 sum_1 = DAD + sum_0;
528 In which
529 - DX is at least double the size of X
530 - DY is at least double the size of Y
531 - DX, DY, DDIFF, DAD all have the same type
532 - sum is the same size of DAD or bigger
533 - sum has been recognized as a reduction variable.
534
535 This is equivalent to:
536 DDIFF = X w- Y; #widen sub
537 DAD = ABS_EXPR <DDIFF>;
538 sum_1 = DAD w+ sum_0; #widen summation
539 or
540 DDIFF = X w- Y; #widen sub
541 DAD = ABS_EXPR <DDIFF>;
542 sum_1 = DAD + sum_0; #summation
543 */
544
545 /* Starting from LAST_STMT, follow the defs of its uses in search
546 of the above pattern. */
547
548 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
549 return NULL;
550
551 tree plus_oprnd0, plus_oprnd1;
552
553 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
554 {
555 /* Has been detected as widening-summation? */
556
557 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
558 sum_type = gimple_expr_type (stmt);
559 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
560 return NULL;
561 plus_oprnd0 = gimple_assign_rhs1 (stmt);
562 plus_oprnd1 = gimple_assign_rhs2 (stmt);
563 half_type = TREE_TYPE (plus_oprnd0);
564 }
565 else
566 {
567 gimple def_stmt;
568
569 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
570 return NULL;
571 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
572 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
573 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
574 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
575 return NULL;
576
577 /* The type conversion could be promotion, demotion,
578 or just signed -> unsigned. */
579 if (type_conversion_p (plus_oprnd0, last_stmt, false,
580 &half_type, &def_stmt, &promotion))
581 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
582 else
583 half_type = sum_type;
584 }
585
586 /* So far so good. Since last_stmt was detected as a (summation) reduction,
587 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
588 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
589 Then check that plus_oprnd0 is defined by an abs_expr. */
590
591 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
592 return NULL;
593
594 tree abs_type = half_type;
595 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
596
597 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
598 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
599 return NULL;
600
601 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
602 inside the loop (in case we are analyzing an outer-loop). */
603 if (!is_gimple_assign (abs_stmt))
604 return NULL;
605
606 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
607 gcc_assert (abs_stmt_vinfo);
608 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
609 return NULL;
610 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
611 return NULL;
612
613 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
614 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
615 return NULL;
616 if (TYPE_UNSIGNED (abs_type))
617 return NULL;
618
619 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
620
621 if (TREE_CODE (abs_oprnd) != SSA_NAME)
622 return NULL;
623
624 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
625
626 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
627 if (!gimple_bb (diff_stmt)
628 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
629 return NULL;
630
631 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
632 inside the loop (in case we are analyzing an outer-loop). */
633 if (!is_gimple_assign (diff_stmt))
634 return NULL;
635
636 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
637 gcc_assert (diff_stmt_vinfo);
638 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
639 return NULL;
640 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
641 return NULL;
642
643 tree half_type0, half_type1;
644 gimple def_stmt;
645
646 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
647 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
648
649 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
650 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
651 return NULL;
652 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
653 &half_type0, &def_stmt, &promotion)
654 || !promotion)
655 return NULL;
656 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
657
658 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
659 &half_type1, &def_stmt, &promotion)
660 || !promotion)
661 return NULL;
662 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
663
664 if (!types_compatible_p (half_type0, half_type1))
665 return NULL;
666 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
667 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
668 return NULL;
669
670 *type_in = TREE_TYPE (sad_oprnd0);
671 *type_out = sum_type;
672
673 /* Pattern detected. Create a stmt to be used to replace the pattern: */
674 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
675 gimple pattern_stmt = gimple_build_assign_with_ops
676 (SAD_EXPR, var, sad_oprnd0, sad_oprnd1, plus_oprnd1);
677
678 if (dump_enabled_p ())
679 {
680 dump_printf_loc (MSG_NOTE, vect_location,
681 "vect_recog_sad_pattern: detected: ");
682 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
683 dump_printf (MSG_NOTE, "\n");
684 }
685
686 /* We don't allow changing the order of the computation in the inner-loop
687 when doing outer-loop vectorization. */
688 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
689
690 return pattern_stmt;
691 }
692
693
694 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
695 and LSHIFT_EXPR.
696
697 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
698 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
699
700 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
701 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
702 that satisfies the above restrictions, we can perform a widening opeartion
703 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
704 with a_it = (interm_type) a_t; */
705
706 static bool
707 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
708 tree const_oprnd, tree *oprnd,
709 vec<gimple> *stmts, tree type,
710 tree *half_type, gimple def_stmt)
711 {
712 tree new_type, new_oprnd;
713 gimple new_stmt;
714
715 if (code != MULT_EXPR && code != LSHIFT_EXPR)
716 return false;
717
718 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
719 || (code == LSHIFT_EXPR
720 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
721 != 1))
722 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
723 {
724 /* CONST_OPRND is a constant of HALF_TYPE. */
725 *oprnd = gimple_assign_rhs1 (def_stmt);
726 return true;
727 }
728
729 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
730 return false;
731
732 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
733 return false;
734
735 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
736 a type 2 times bigger than HALF_TYPE. */
737 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
738 TYPE_UNSIGNED (type));
739 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
740 || (code == LSHIFT_EXPR
741 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
742 return false;
743
744 /* Use NEW_TYPE for widening operation. */
745 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
746 {
747 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
748 /* Check if the already created pattern stmt is what we need. */
749 if (!is_gimple_assign (new_stmt)
750 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
751 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
752 return false;
753
754 stmts->safe_push (def_stmt);
755 *oprnd = gimple_assign_lhs (new_stmt);
756 }
757 else
758 {
759 /* Create a_T = (NEW_TYPE) a_t; */
760 *oprnd = gimple_assign_rhs1 (def_stmt);
761 new_oprnd = make_ssa_name (new_type, NULL);
762 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
763 NULL_TREE);
764 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
765 stmts->safe_push (def_stmt);
766 *oprnd = new_oprnd;
767 }
768
769 *half_type = new_type;
770 return true;
771 }
772
773
774 /* Function vect_recog_widen_mult_pattern
775
776 Try to find the following pattern:
777
778 type1 a_t;
779 type2 b_t;
780 TYPE a_T, b_T, prod_T;
781
782 S1 a_t = ;
783 S2 b_t = ;
784 S3 a_T = (TYPE) a_t;
785 S4 b_T = (TYPE) b_t;
786 S5 prod_T = a_T * b_T;
787
788 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
789
790 Also detect unsigned cases:
791
792 unsigned type1 a_t;
793 unsigned type2 b_t;
794 unsigned TYPE u_prod_T;
795 TYPE a_T, b_T, prod_T;
796
797 S1 a_t = ;
798 S2 b_t = ;
799 S3 a_T = (TYPE) a_t;
800 S4 b_T = (TYPE) b_t;
801 S5 prod_T = a_T * b_T;
802 S6 u_prod_T = (unsigned TYPE) prod_T;
803
804 and multiplication by constants:
805
806 type a_t;
807 TYPE a_T, prod_T;
808
809 S1 a_t = ;
810 S3 a_T = (TYPE) a_t;
811 S5 prod_T = a_T * CONST;
812
813 A special case of multiplication by constants is when 'TYPE' is 4 times
814 bigger than 'type', but CONST fits an intermediate type 2 times smaller
815 than 'TYPE'. In that case we create an additional pattern stmt for S3
816 to create a variable of the intermediate type, and perform widen-mult
817 on the intermediate type as well:
818
819 type a_t;
820 interm_type a_it;
821 TYPE a_T, prod_T, prod_T';
822
823 S1 a_t = ;
824 S3 a_T = (TYPE) a_t;
825 '--> a_it = (interm_type) a_t;
826 S5 prod_T = a_T * CONST;
827 '--> prod_T' = a_it w* CONST;
828
829 Input/Output:
830
831 * STMTS: Contains a stmt from which the pattern search begins. In the
832 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
833 is detected. In case of unsigned widen-mult, the original stmt (S5) is
834 replaced with S6 in STMTS. In case of multiplication by a constant
835 of an intermediate type (the last case above), STMTS also contains S3
836 (inserted before S5).
837
838 Output:
839
840 * TYPE_IN: The type of the input arguments to the pattern.
841
842 * TYPE_OUT: The type of the output of this pattern.
843
844 * Return value: A new stmt that will be used to replace the sequence of
845 stmts that constitute the pattern. In this case it will be:
846 WIDEN_MULT <a_t, b_t>
847 If the result of WIDEN_MULT needs to be converted to a larger type, the
848 returned stmt will be this type conversion stmt.
849 */
850
851 static gimple
852 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
853 tree *type_in, tree *type_out)
854 {
855 gimple last_stmt = stmts->pop ();
856 gimple def_stmt0, def_stmt1;
857 tree oprnd0, oprnd1;
858 tree type, half_type0, half_type1;
859 gimple new_stmt = NULL, pattern_stmt = NULL;
860 tree vectype, vecitype;
861 tree var;
862 enum tree_code dummy_code;
863 int dummy_int;
864 vec<tree> dummy_vec;
865 bool op1_ok;
866 bool promotion;
867
868 if (!is_gimple_assign (last_stmt))
869 return NULL;
870
871 type = gimple_expr_type (last_stmt);
872
873 /* Starting from LAST_STMT, follow the defs of its uses in search
874 of the above pattern. */
875
876 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
877 return NULL;
878
879 oprnd0 = gimple_assign_rhs1 (last_stmt);
880 oprnd1 = gimple_assign_rhs2 (last_stmt);
881 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
882 || !types_compatible_p (TREE_TYPE (oprnd1), type))
883 return NULL;
884
885 /* Check argument 0. */
886 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
887 &promotion)
888 || !promotion)
889 return NULL;
890 /* Check argument 1. */
891 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
892 &def_stmt1, &promotion);
893
894 if (op1_ok && promotion)
895 {
896 oprnd0 = gimple_assign_rhs1 (def_stmt0);
897 oprnd1 = gimple_assign_rhs1 (def_stmt1);
898 }
899 else
900 {
901 if (TREE_CODE (oprnd1) == INTEGER_CST
902 && TREE_CODE (half_type0) == INTEGER_TYPE
903 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
904 &oprnd0, stmts, type,
905 &half_type0, def_stmt0))
906 {
907 half_type1 = half_type0;
908 oprnd1 = fold_convert (half_type1, oprnd1);
909 }
910 else
911 return NULL;
912 }
913
914 /* If the two arguments have different sizes, convert the one with
915 the smaller type into the larger type. */
916 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
917 {
918 tree* oprnd = NULL;
919 gimple def_stmt = NULL;
920
921 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
922 {
923 def_stmt = def_stmt0;
924 half_type0 = half_type1;
925 oprnd = &oprnd0;
926 }
927 else
928 {
929 def_stmt = def_stmt1;
930 half_type1 = half_type0;
931 oprnd = &oprnd1;
932 }
933
934 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
935 tree new_oprnd = make_ssa_name (half_type0, NULL);
936 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
937 old_oprnd, NULL_TREE);
938 *oprnd = new_oprnd;
939 }
940
941 /* Handle unsigned case. Look for
942 S6 u_prod_T = (unsigned TYPE) prod_T;
943 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
944 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
945 {
946 gimple use_stmt;
947 tree use_lhs;
948 tree use_type;
949
950 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
951 return NULL;
952
953 use_stmt = vect_single_imm_use (last_stmt);
954 if (!use_stmt || !is_gimple_assign (use_stmt)
955 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
956 return NULL;
957
958 use_lhs = gimple_assign_lhs (use_stmt);
959 use_type = TREE_TYPE (use_lhs);
960 if (!INTEGRAL_TYPE_P (use_type)
961 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
962 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
963 return NULL;
964
965 type = use_type;
966 last_stmt = use_stmt;
967 }
968
969 if (!types_compatible_p (half_type0, half_type1))
970 return NULL;
971
972 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
973 to get an intermediate result of type ITYPE. In this case we need
974 to build a statement to convert this intermediate result to type TYPE. */
975 tree itype = type;
976 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
977 itype = build_nonstandard_integer_type
978 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
979 TYPE_UNSIGNED (type));
980
981 /* Pattern detected. */
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE, vect_location,
984 "vect_recog_widen_mult_pattern: detected:\n");
985
986 /* Check target support */
987 vectype = get_vectype_for_scalar_type (half_type0);
988 vecitype = get_vectype_for_scalar_type (itype);
989 if (!vectype
990 || !vecitype
991 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
992 vecitype, vectype,
993 &dummy_code, &dummy_code,
994 &dummy_int, &dummy_vec))
995 return NULL;
996
997 *type_in = vectype;
998 *type_out = get_vectype_for_scalar_type (type);
999
1000 /* Pattern supported. Create a stmt to be used to replace the pattern: */
1001 var = vect_recog_temp_ssa_var (itype, NULL);
1002 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
1003 oprnd1);
1004
1005 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1006 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1007 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1008 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1009
1010 /* If the original two operands have different sizes, we may need to convert
1011 the smaller one into the larget type. If this is the case, at this point
1012 the new stmt is already built. */
1013 if (new_stmt)
1014 {
1015 append_pattern_def_seq (stmt_vinfo, new_stmt);
1016 stmt_vec_info new_stmt_info
1017 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
1018 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1019 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1020 }
1021
1022 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1023 the result of the widen-mult operation into type TYPE. */
1024 if (itype != type)
1025 {
1026 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1027 stmt_vec_info pattern_stmt_info
1028 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1029 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1030 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1031 pattern_stmt
1032 = gimple_build_assign_with_ops (NOP_EXPR,
1033 vect_recog_temp_ssa_var (type, NULL),
1034 gimple_assign_lhs (pattern_stmt),
1035 NULL_TREE);
1036 }
1037
1038 if (dump_enabled_p ())
1039 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1040
1041 stmts->safe_push (last_stmt);
1042 return pattern_stmt;
1043 }
1044
1045
1046 /* Function vect_recog_pow_pattern
1047
1048 Try to find the following pattern:
1049
1050 x = POW (y, N);
1051
1052 with POW being one of pow, powf, powi, powif and N being
1053 either 2 or 0.5.
1054
1055 Input:
1056
1057 * LAST_STMT: A stmt from which the pattern search begins.
1058
1059 Output:
1060
1061 * TYPE_IN: The type of the input arguments to the pattern.
1062
1063 * TYPE_OUT: The type of the output of this pattern.
1064
1065 * Return value: A new stmt that will be used to replace the sequence of
1066 stmts that constitute the pattern. In this case it will be:
1067 x = x * x
1068 or
1069 x = sqrt (x)
1070 */
1071
1072 static gimple
1073 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
1074 tree *type_out)
1075 {
1076 gimple last_stmt = (*stmts)[0];
1077 tree fn, base, exp = NULL;
1078 gimple stmt;
1079 tree var;
1080
1081 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1082 return NULL;
1083
1084 fn = gimple_call_fndecl (last_stmt);
1085 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1086 return NULL;
1087
1088 switch (DECL_FUNCTION_CODE (fn))
1089 {
1090 case BUILT_IN_POWIF:
1091 case BUILT_IN_POWI:
1092 case BUILT_IN_POWF:
1093 case BUILT_IN_POW:
1094 base = gimple_call_arg (last_stmt, 0);
1095 exp = gimple_call_arg (last_stmt, 1);
1096 if (TREE_CODE (exp) != REAL_CST
1097 && TREE_CODE (exp) != INTEGER_CST)
1098 return NULL;
1099 break;
1100
1101 default:
1102 return NULL;
1103 }
1104
1105 /* We now have a pow or powi builtin function call with a constant
1106 exponent. */
1107
1108 *type_out = NULL_TREE;
1109
1110 /* Catch squaring. */
1111 if ((tree_fits_shwi_p (exp)
1112 && tree_to_shwi (exp) == 2)
1113 || (TREE_CODE (exp) == REAL_CST
1114 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
1115 {
1116 *type_in = TREE_TYPE (base);
1117
1118 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1119 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
1120 return stmt;
1121 }
1122
1123 /* Catch square root. */
1124 if (TREE_CODE (exp) == REAL_CST
1125 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1126 {
1127 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1128 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1129 if (*type_in)
1130 {
1131 gimple stmt = gimple_build_call (newfn, 1, base);
1132 if (vectorizable_function (stmt, *type_in, *type_in)
1133 != NULL_TREE)
1134 {
1135 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1136 gimple_call_set_lhs (stmt, var);
1137 return stmt;
1138 }
1139 }
1140 }
1141
1142 return NULL;
1143 }
1144
1145
1146 /* Function vect_recog_widen_sum_pattern
1147
1148 Try to find the following pattern:
1149
1150 type x_t;
1151 TYPE x_T, sum = init;
1152 loop:
1153 sum_0 = phi <init, sum_1>
1154 S1 x_t = *p;
1155 S2 x_T = (TYPE) x_t;
1156 S3 sum_1 = x_T + sum_0;
1157
1158 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1159 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1160 a special case of a reduction computation.
1161
1162 Input:
1163
1164 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1165 when this function is called with S3, the pattern {S2,S3} will be detected.
1166
1167 Output:
1168
1169 * TYPE_IN: The type of the input arguments to the pattern.
1170
1171 * TYPE_OUT: The type of the output of this pattern.
1172
1173 * Return value: A new stmt that will be used to replace the sequence of
1174 stmts that constitute the pattern. In this case it will be:
1175 WIDEN_SUM <x_t, sum_0>
1176
1177 Note: The widening-sum idiom is a widening reduction pattern that is
1178 vectorized without preserving all the intermediate results. It
1179 produces only N/2 (widened) results (by summing up pairs of
1180 intermediate results) rather than all N results. Therefore, we
1181 cannot allow this pattern when we want to get all the results and in
1182 the correct order (as is the case when this computation is in an
1183 inner-loop nested in an outer-loop that us being vectorized). */
1184
1185 static gimple
1186 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
1187 tree *type_out)
1188 {
1189 gimple stmt, last_stmt = (*stmts)[0];
1190 tree oprnd0, oprnd1;
1191 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1192 tree type, half_type;
1193 gimple pattern_stmt;
1194 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1195 struct loop *loop;
1196 tree var;
1197 bool promotion;
1198
1199 if (!loop_info)
1200 return NULL;
1201
1202 loop = LOOP_VINFO_LOOP (loop_info);
1203
1204 if (!is_gimple_assign (last_stmt))
1205 return NULL;
1206
1207 type = gimple_expr_type (last_stmt);
1208
1209 /* Look for the following pattern
1210 DX = (TYPE) X;
1211 sum_1 = DX + sum_0;
1212 In which DX is at least double the size of X, and sum_1 has been
1213 recognized as a reduction variable.
1214 */
1215
1216 /* Starting from LAST_STMT, follow the defs of its uses in search
1217 of the above pattern. */
1218
1219 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1220 return NULL;
1221
1222 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
1223 return NULL;
1224
1225 oprnd0 = gimple_assign_rhs1 (last_stmt);
1226 oprnd1 = gimple_assign_rhs2 (last_stmt);
1227 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1228 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1229 return NULL;
1230
1231 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1232 we know that oprnd1 is the reduction variable (defined by a loop-header
1233 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1234 Left to check that oprnd0 is defined by a cast from type 'type' to type
1235 'TYPE'. */
1236
1237 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1238 &promotion)
1239 || !promotion)
1240 return NULL;
1241
1242 oprnd0 = gimple_assign_rhs1 (stmt);
1243 *type_in = half_type;
1244 *type_out = type;
1245
1246 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1247 var = vect_recog_temp_ssa_var (type, NULL);
1248 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
1249 oprnd0, oprnd1);
1250
1251 if (dump_enabled_p ())
1252 {
1253 dump_printf_loc (MSG_NOTE, vect_location,
1254 "vect_recog_widen_sum_pattern: detected: ");
1255 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1256 dump_printf (MSG_NOTE, "\n");
1257 }
1258
1259 /* We don't allow changing the order of the computation in the inner-loop
1260 when doing outer-loop vectorization. */
1261 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
1262
1263 return pattern_stmt;
1264 }
1265
1266
1267 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1268
1269 Input:
1270 STMT - a statement to check.
1271 DEF - we support operations with two operands, one of which is constant.
1272 The other operand can be defined by a demotion operation, or by a
1273 previous statement in a sequence of over-promoted operations. In the
1274 later case DEF is used to replace that operand. (It is defined by a
1275 pattern statement we created for the previous statement in the
1276 sequence).
1277
1278 Input/output:
1279 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1280 NULL, it's the type of DEF.
1281 STMTS - additional pattern statements. If a pattern statement (type
1282 conversion) is created in this function, its original statement is
1283 added to STMTS.
1284
1285 Output:
1286 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1287 operands to use in the new pattern statement for STMT (will be created
1288 in vect_recog_over_widening_pattern ()).
1289 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1290 statements for STMT: the first one is a type promotion and the second
1291 one is the operation itself. We return the type promotion statement
1292 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1293 the second pattern statement. */
1294
1295 static bool
1296 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1297 tree *op0, tree *op1, gimple *new_def_stmt,
1298 vec<gimple> *stmts)
1299 {
1300 enum tree_code code;
1301 tree const_oprnd, oprnd;
1302 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1303 gimple def_stmt, new_stmt;
1304 bool first = false;
1305 bool promotion;
1306
1307 *op0 = NULL_TREE;
1308 *op1 = NULL_TREE;
1309 *new_def_stmt = NULL;
1310
1311 if (!is_gimple_assign (stmt))
1312 return false;
1313
1314 code = gimple_assign_rhs_code (stmt);
1315 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1316 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1317 return false;
1318
1319 oprnd = gimple_assign_rhs1 (stmt);
1320 const_oprnd = gimple_assign_rhs2 (stmt);
1321 type = gimple_expr_type (stmt);
1322
1323 if (TREE_CODE (oprnd) != SSA_NAME
1324 || TREE_CODE (const_oprnd) != INTEGER_CST)
1325 return false;
1326
1327 /* If oprnd has other uses besides that in stmt we cannot mark it
1328 as being part of a pattern only. */
1329 if (!has_single_use (oprnd))
1330 return false;
1331
1332 /* If we are in the middle of a sequence, we use DEF from a previous
1333 statement. Otherwise, OPRND has to be a result of type promotion. */
1334 if (*new_type)
1335 {
1336 half_type = *new_type;
1337 oprnd = def;
1338 }
1339 else
1340 {
1341 first = true;
1342 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1343 &promotion)
1344 || !promotion
1345 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1346 return false;
1347 }
1348
1349 /* Can we perform the operation on a smaller type? */
1350 switch (code)
1351 {
1352 case BIT_IOR_EXPR:
1353 case BIT_XOR_EXPR:
1354 case BIT_AND_EXPR:
1355 if (!int_fits_type_p (const_oprnd, half_type))
1356 {
1357 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1358 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1359 return false;
1360
1361 interm_type = build_nonstandard_integer_type (
1362 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1363 if (!int_fits_type_p (const_oprnd, interm_type))
1364 return false;
1365 }
1366
1367 break;
1368
1369 case LSHIFT_EXPR:
1370 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1371 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1372 return false;
1373
1374 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1375 (e.g., if the original value was char, the shift amount is at most 8
1376 if we want to use short). */
1377 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1378 return false;
1379
1380 interm_type = build_nonstandard_integer_type (
1381 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1382
1383 if (!vect_supportable_shift (code, interm_type))
1384 return false;
1385
1386 break;
1387
1388 case RSHIFT_EXPR:
1389 if (vect_supportable_shift (code, half_type))
1390 break;
1391
1392 /* Try intermediate type - HALF_TYPE is not supported. */
1393 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1394 return false;
1395
1396 interm_type = build_nonstandard_integer_type (
1397 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1398
1399 if (!vect_supportable_shift (code, interm_type))
1400 return false;
1401
1402 break;
1403
1404 default:
1405 gcc_unreachable ();
1406 }
1407
1408 /* There are four possible cases:
1409 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1410 the first statement in the sequence)
1411 a. The original, HALF_TYPE, is not enough - we replace the promotion
1412 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1413 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1414 promotion.
1415 2. OPRND is defined by a pattern statement we created.
1416 a. Its type is not sufficient for the operation, we create a new stmt:
1417 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1418 this statement in NEW_DEF_STMT, and it is later put in
1419 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1420 b. OPRND is good to use in the new statement. */
1421 if (first)
1422 {
1423 if (interm_type)
1424 {
1425 /* Replace the original type conversion HALF_TYPE->TYPE with
1426 HALF_TYPE->INTERM_TYPE. */
1427 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1428 {
1429 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1430 /* Check if the already created pattern stmt is what we need. */
1431 if (!is_gimple_assign (new_stmt)
1432 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1433 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1434 return false;
1435
1436 stmts->safe_push (def_stmt);
1437 oprnd = gimple_assign_lhs (new_stmt);
1438 }
1439 else
1440 {
1441 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1442 oprnd = gimple_assign_rhs1 (def_stmt);
1443 new_oprnd = make_ssa_name (interm_type, NULL);
1444 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1445 oprnd, NULL_TREE);
1446 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1447 stmts->safe_push (def_stmt);
1448 oprnd = new_oprnd;
1449 }
1450 }
1451 else
1452 {
1453 /* Retrieve the operand before the type promotion. */
1454 oprnd = gimple_assign_rhs1 (def_stmt);
1455 }
1456 }
1457 else
1458 {
1459 if (interm_type)
1460 {
1461 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1462 new_oprnd = make_ssa_name (interm_type, NULL);
1463 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1464 oprnd, NULL_TREE);
1465 oprnd = new_oprnd;
1466 *new_def_stmt = new_stmt;
1467 }
1468
1469 /* Otherwise, OPRND is already set. */
1470 }
1471
1472 if (interm_type)
1473 *new_type = interm_type;
1474 else
1475 *new_type = half_type;
1476
1477 *op0 = oprnd;
1478 *op1 = fold_convert (*new_type, const_oprnd);
1479
1480 return true;
1481 }
1482
1483
1484 /* Try to find a statement or a sequence of statements that can be performed
1485 on a smaller type:
1486
1487 type x_t;
1488 TYPE x_T, res0_T, res1_T;
1489 loop:
1490 S1 x_t = *p;
1491 S2 x_T = (TYPE) x_t;
1492 S3 res0_T = op (x_T, C0);
1493 S4 res1_T = op (res0_T, C1);
1494 S5 ... = () res1_T; - type demotion
1495
1496 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1497 constants.
1498 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1499 be 'type' or some intermediate type. For now, we expect S5 to be a type
1500 demotion operation. We also check that S3 and S4 have only one use. */
1501
1502 static gimple
1503 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1504 tree *type_in, tree *type_out)
1505 {
1506 gimple stmt = stmts->pop ();
1507 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1508 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1509 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1510 bool first;
1511 tree type = NULL;
1512
1513 first = true;
1514 while (1)
1515 {
1516 if (!vinfo_for_stmt (stmt)
1517 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1518 return NULL;
1519
1520 new_def_stmt = NULL;
1521 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1522 &op0, &op1, &new_def_stmt,
1523 stmts))
1524 {
1525 if (first)
1526 return NULL;
1527 else
1528 break;
1529 }
1530
1531 /* STMT can be performed on a smaller type. Check its uses. */
1532 use_stmt = vect_single_imm_use (stmt);
1533 if (!use_stmt || !is_gimple_assign (use_stmt))
1534 return NULL;
1535
1536 /* Create pattern statement for STMT. */
1537 vectype = get_vectype_for_scalar_type (new_type);
1538 if (!vectype)
1539 return NULL;
1540
1541 /* We want to collect all the statements for which we create pattern
1542 statetments, except for the case when the last statement in the
1543 sequence doesn't have a corresponding pattern statement. In such
1544 case we associate the last pattern statement with the last statement
1545 in the sequence. Therefore, we only add the original statement to
1546 the list if we know that it is not the last. */
1547 if (prev_stmt)
1548 stmts->safe_push (prev_stmt);
1549
1550 var = vect_recog_temp_ssa_var (new_type, NULL);
1551 pattern_stmt
1552 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1553 op0, op1);
1554 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1555 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1556
1557 if (dump_enabled_p ())
1558 {
1559 dump_printf_loc (MSG_NOTE, vect_location,
1560 "created pattern stmt: ");
1561 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1562 dump_printf (MSG_NOTE, "\n");
1563 }
1564
1565 type = gimple_expr_type (stmt);
1566 prev_stmt = stmt;
1567 stmt = use_stmt;
1568
1569 first = false;
1570 }
1571
1572 /* We got a sequence. We expect it to end with a type demotion operation.
1573 Otherwise, we quit (for now). There are three possible cases: the
1574 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1575 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1576 NEW_TYPE differs (we create a new conversion statement). */
1577 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1578 {
1579 use_lhs = gimple_assign_lhs (use_stmt);
1580 use_type = TREE_TYPE (use_lhs);
1581 /* Support only type demotion or signedess change. */
1582 if (!INTEGRAL_TYPE_P (use_type)
1583 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1584 return NULL;
1585
1586 /* Check that NEW_TYPE is not bigger than the conversion result. */
1587 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1588 return NULL;
1589
1590 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1591 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1592 {
1593 /* Create NEW_TYPE->USE_TYPE conversion. */
1594 new_oprnd = make_ssa_name (use_type, NULL);
1595 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1596 var, NULL_TREE);
1597 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1598
1599 *type_in = get_vectype_for_scalar_type (new_type);
1600 *type_out = get_vectype_for_scalar_type (use_type);
1601
1602 /* We created a pattern statement for the last statement in the
1603 sequence, so we don't need to associate it with the pattern
1604 statement created for PREV_STMT. Therefore, we add PREV_STMT
1605 to the list in order to mark it later in vect_pattern_recog_1. */
1606 if (prev_stmt)
1607 stmts->safe_push (prev_stmt);
1608 }
1609 else
1610 {
1611 if (prev_stmt)
1612 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1613 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1614
1615 *type_in = vectype;
1616 *type_out = NULL_TREE;
1617 }
1618
1619 stmts->safe_push (use_stmt);
1620 }
1621 else
1622 /* TODO: support general case, create a conversion to the correct type. */
1623 return NULL;
1624
1625 /* Pattern detected. */
1626 if (dump_enabled_p ())
1627 {
1628 dump_printf_loc (MSG_NOTE, vect_location,
1629 "vect_recog_over_widening_pattern: detected: ");
1630 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1631 dump_printf (MSG_NOTE, "\n");
1632 }
1633
1634 return pattern_stmt;
1635 }
1636
1637 /* Detect widening shift pattern:
1638
1639 type a_t;
1640 TYPE a_T, res_T;
1641
1642 S1 a_t = ;
1643 S2 a_T = (TYPE) a_t;
1644 S3 res_T = a_T << CONST;
1645
1646 where type 'TYPE' is at least double the size of type 'type'.
1647
1648 Also detect cases where the shift result is immediately converted
1649 to another type 'result_type' that is no larger in size than 'TYPE'.
1650 In those cases we perform a widen-shift that directly results in
1651 'result_type', to avoid a possible over-widening situation:
1652
1653 type a_t;
1654 TYPE a_T, res_T;
1655 result_type res_result;
1656
1657 S1 a_t = ;
1658 S2 a_T = (TYPE) a_t;
1659 S3 res_T = a_T << CONST;
1660 S4 res_result = (result_type) res_T;
1661 '--> res_result' = a_t w<< CONST;
1662
1663 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1664 create an additional pattern stmt for S2 to create a variable of an
1665 intermediate type, and perform widen-shift on the intermediate type:
1666
1667 type a_t;
1668 interm_type a_it;
1669 TYPE a_T, res_T, res_T';
1670
1671 S1 a_t = ;
1672 S2 a_T = (TYPE) a_t;
1673 '--> a_it = (interm_type) a_t;
1674 S3 res_T = a_T << CONST;
1675 '--> res_T' = a_it <<* CONST;
1676
1677 Input/Output:
1678
1679 * STMTS: Contains a stmt from which the pattern search begins.
1680 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1681 in STMTS. When an intermediate type is used and a pattern statement is
1682 created for S2, we also put S2 here (before S3).
1683
1684 Output:
1685
1686 * TYPE_IN: The type of the input arguments to the pattern.
1687
1688 * TYPE_OUT: The type of the output of this pattern.
1689
1690 * Return value: A new stmt that will be used to replace the sequence of
1691 stmts that constitute the pattern. In this case it will be:
1692 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1693
1694 static gimple
1695 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1696 tree *type_in, tree *type_out)
1697 {
1698 gimple last_stmt = stmts->pop ();
1699 gimple def_stmt0;
1700 tree oprnd0, oprnd1;
1701 tree type, half_type0;
1702 gimple pattern_stmt;
1703 tree vectype, vectype_out = NULL_TREE;
1704 tree var;
1705 enum tree_code dummy_code;
1706 int dummy_int;
1707 vec<tree> dummy_vec;
1708 gimple use_stmt;
1709 bool promotion;
1710
1711 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1712 return NULL;
1713
1714 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1715 return NULL;
1716
1717 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1718 return NULL;
1719
1720 oprnd0 = gimple_assign_rhs1 (last_stmt);
1721 oprnd1 = gimple_assign_rhs2 (last_stmt);
1722 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1723 return NULL;
1724
1725 /* Check operand 0: it has to be defined by a type promotion. */
1726 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1727 &promotion)
1728 || !promotion)
1729 return NULL;
1730
1731 /* Check operand 1: has to be positive. We check that it fits the type
1732 in vect_handle_widen_op_by_const (). */
1733 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1734 return NULL;
1735
1736 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1737 type = gimple_expr_type (last_stmt);
1738
1739 /* Check for subsequent conversion to another type. */
1740 use_stmt = vect_single_imm_use (last_stmt);
1741 if (use_stmt && is_gimple_assign (use_stmt)
1742 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1743 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1744 {
1745 tree use_lhs = gimple_assign_lhs (use_stmt);
1746 tree use_type = TREE_TYPE (use_lhs);
1747
1748 if (INTEGRAL_TYPE_P (use_type)
1749 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1750 {
1751 last_stmt = use_stmt;
1752 type = use_type;
1753 }
1754 }
1755
1756 /* Check if this a widening operation. */
1757 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1758 &oprnd0, stmts,
1759 type, &half_type0, def_stmt0))
1760 return NULL;
1761
1762 /* Pattern detected. */
1763 if (dump_enabled_p ())
1764 dump_printf_loc (MSG_NOTE, vect_location,
1765 "vect_recog_widen_shift_pattern: detected:\n");
1766
1767 /* Check target support. */
1768 vectype = get_vectype_for_scalar_type (half_type0);
1769 vectype_out = get_vectype_for_scalar_type (type);
1770
1771 if (!vectype
1772 || !vectype_out
1773 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1774 vectype_out, vectype,
1775 &dummy_code, &dummy_code,
1776 &dummy_int, &dummy_vec))
1777 return NULL;
1778
1779 *type_in = vectype;
1780 *type_out = vectype_out;
1781
1782 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1783 var = vect_recog_temp_ssa_var (type, NULL);
1784 pattern_stmt =
1785 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1786
1787 if (dump_enabled_p ())
1788 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1789
1790 stmts->safe_push (last_stmt);
1791 return pattern_stmt;
1792 }
1793
1794 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1795
1796 type a_t, b_t, c_t;
1797
1798 S0 a_t = b_t r<< c_t;
1799
1800 Input/Output:
1801
1802 * STMTS: Contains a stmt from which the pattern search begins,
1803 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1804 with a sequence:
1805
1806 S1 d_t = -c_t;
1807 S2 e_t = d_t & (B - 1);
1808 S3 f_t = b_t << c_t;
1809 S4 g_t = b_t >> e_t;
1810 S0 a_t = f_t | g_t;
1811
1812 where B is element bitsize of type.
1813
1814 Output:
1815
1816 * TYPE_IN: The type of the input arguments to the pattern.
1817
1818 * TYPE_OUT: The type of the output of this pattern.
1819
1820 * Return value: A new stmt that will be used to replace the rotate
1821 S0 stmt. */
1822
1823 static gimple
1824 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1825 {
1826 gimple last_stmt = stmts->pop ();
1827 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1828 gimple pattern_stmt, def_stmt;
1829 enum tree_code rhs_code;
1830 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1831 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1832 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1833 enum vect_def_type dt;
1834 optab optab1, optab2;
1835 edge ext_def = NULL;
1836
1837 if (!is_gimple_assign (last_stmt))
1838 return NULL;
1839
1840 rhs_code = gimple_assign_rhs_code (last_stmt);
1841 switch (rhs_code)
1842 {
1843 case LROTATE_EXPR:
1844 case RROTATE_EXPR:
1845 break;
1846 default:
1847 return NULL;
1848 }
1849
1850 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1851 return NULL;
1852
1853 lhs = gimple_assign_lhs (last_stmt);
1854 oprnd0 = gimple_assign_rhs1 (last_stmt);
1855 type = TREE_TYPE (oprnd0);
1856 oprnd1 = gimple_assign_rhs2 (last_stmt);
1857 if (TREE_CODE (oprnd0) != SSA_NAME
1858 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1859 || !INTEGRAL_TYPE_P (type)
1860 || !TYPE_UNSIGNED (type))
1861 return NULL;
1862
1863 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1864 &def, &dt))
1865 return NULL;
1866
1867 if (dt != vect_internal_def
1868 && dt != vect_constant_def
1869 && dt != vect_external_def)
1870 return NULL;
1871
1872 vectype = get_vectype_for_scalar_type (type);
1873 if (vectype == NULL_TREE)
1874 return NULL;
1875
1876 /* If vector/vector or vector/scalar rotate is supported by the target,
1877 don't do anything here. */
1878 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1879 if (optab1
1880 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1881 return NULL;
1882
1883 if (bb_vinfo != NULL || dt != vect_internal_def)
1884 {
1885 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1886 if (optab2
1887 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1888 return NULL;
1889 }
1890
1891 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1892 don't do anything here either. */
1893 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1894 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1895 if (!optab1
1896 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1897 || !optab2
1898 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1899 {
1900 if (bb_vinfo == NULL && dt == vect_internal_def)
1901 return NULL;
1902 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1903 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1904 if (!optab1
1905 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1906 || !optab2
1907 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1908 return NULL;
1909 }
1910
1911 *type_in = vectype;
1912 *type_out = vectype;
1913 if (*type_in == NULL_TREE)
1914 return NULL;
1915
1916 if (dt == vect_external_def
1917 && TREE_CODE (oprnd1) == SSA_NAME
1918 && loop_vinfo)
1919 {
1920 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1921 ext_def = loop_preheader_edge (loop);
1922 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1923 {
1924 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1925 if (bb == NULL
1926 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1927 ext_def = NULL;
1928 }
1929 }
1930
1931 def = NULL_TREE;
1932 if (TREE_CODE (oprnd1) == INTEGER_CST
1933 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1934 def = oprnd1;
1935 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1936 {
1937 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1938 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1939 && TYPE_PRECISION (TREE_TYPE (rhs1))
1940 == TYPE_PRECISION (type))
1941 def = rhs1;
1942 }
1943
1944 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1945 if (def == NULL_TREE)
1946 {
1947 def = vect_recog_temp_ssa_var (type, NULL);
1948 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1949 NULL_TREE);
1950 if (ext_def)
1951 {
1952 basic_block new_bb
1953 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1954 gcc_assert (!new_bb);
1955 }
1956 else
1957 append_pattern_def_seq (stmt_vinfo, def_stmt);
1958 }
1959 stype = TREE_TYPE (def);
1960
1961 if (TREE_CODE (def) == INTEGER_CST)
1962 {
1963 if (!tree_fits_uhwi_p (def)
1964 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1965 || integer_zerop (def))
1966 return NULL;
1967 def2 = build_int_cst (stype,
1968 GET_MODE_PRECISION (TYPE_MODE (type))
1969 - tree_to_uhwi (def));
1970 }
1971 else
1972 {
1973 tree vecstype = get_vectype_for_scalar_type (stype);
1974 stmt_vec_info def_stmt_vinfo;
1975
1976 if (vecstype == NULL_TREE)
1977 return NULL;
1978 def2 = vect_recog_temp_ssa_var (stype, NULL);
1979 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1980 NULL_TREE);
1981 if (ext_def)
1982 {
1983 basic_block new_bb
1984 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1985 gcc_assert (!new_bb);
1986 }
1987 else
1988 {
1989 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1990 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1991 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1992 append_pattern_def_seq (stmt_vinfo, def_stmt);
1993 }
1994
1995 def2 = vect_recog_temp_ssa_var (stype, NULL);
1996 tree mask
1997 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1998 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1999 gimple_assign_lhs (def_stmt),
2000 mask);
2001 if (ext_def)
2002 {
2003 basic_block new_bb
2004 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2005 gcc_assert (!new_bb);
2006 }
2007 else
2008 {
2009 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2010 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2011 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2012 append_pattern_def_seq (stmt_vinfo, def_stmt);
2013 }
2014 }
2015
2016 var1 = vect_recog_temp_ssa_var (type, NULL);
2017 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
2018 ? LSHIFT_EXPR : RSHIFT_EXPR,
2019 var1, oprnd0, def);
2020 append_pattern_def_seq (stmt_vinfo, def_stmt);
2021
2022 var2 = vect_recog_temp_ssa_var (type, NULL);
2023 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
2024 ? RSHIFT_EXPR : LSHIFT_EXPR,
2025 var2, oprnd0, def2);
2026 append_pattern_def_seq (stmt_vinfo, def_stmt);
2027
2028 /* Pattern detected. */
2029 if (dump_enabled_p ())
2030 dump_printf_loc (MSG_NOTE, vect_location,
2031 "vect_recog_rotate_pattern: detected:\n");
2032
2033 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2034 var = vect_recog_temp_ssa_var (type, NULL);
2035 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
2036
2037 if (dump_enabled_p ())
2038 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2039
2040 stmts->safe_push (last_stmt);
2041 return pattern_stmt;
2042 }
2043
2044 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2045 vectorized:
2046
2047 type a_t;
2048 TYPE b_T, res_T;
2049
2050 S1 a_t = ;
2051 S2 b_T = ;
2052 S3 res_T = b_T op a_t;
2053
2054 where type 'TYPE' is a type with different size than 'type',
2055 and op is <<, >> or rotate.
2056
2057 Also detect cases:
2058
2059 type a_t;
2060 TYPE b_T, c_T, res_T;
2061
2062 S0 c_T = ;
2063 S1 a_t = (type) c_T;
2064 S2 b_T = ;
2065 S3 res_T = b_T op a_t;
2066
2067 Input/Output:
2068
2069 * STMTS: Contains a stmt from which the pattern search begins,
2070 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2071 with a shift/rotate which has same type on both operands, in the
2072 second case just b_T op c_T, in the first case with added cast
2073 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2074
2075 Output:
2076
2077 * TYPE_IN: The type of the input arguments to the pattern.
2078
2079 * TYPE_OUT: The type of the output of this pattern.
2080
2081 * Return value: A new stmt that will be used to replace the shift/rotate
2082 S3 stmt. */
2083
2084 static gimple
2085 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
2086 tree *type_in, tree *type_out)
2087 {
2088 gimple last_stmt = stmts->pop ();
2089 tree oprnd0, oprnd1, lhs, var;
2090 gimple pattern_stmt, def_stmt;
2091 enum tree_code rhs_code;
2092 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2093 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2094 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2095 enum vect_def_type dt;
2096 tree def;
2097
2098 if (!is_gimple_assign (last_stmt))
2099 return NULL;
2100
2101 rhs_code = gimple_assign_rhs_code (last_stmt);
2102 switch (rhs_code)
2103 {
2104 case LSHIFT_EXPR:
2105 case RSHIFT_EXPR:
2106 case LROTATE_EXPR:
2107 case RROTATE_EXPR:
2108 break;
2109 default:
2110 return NULL;
2111 }
2112
2113 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2114 return NULL;
2115
2116 lhs = gimple_assign_lhs (last_stmt);
2117 oprnd0 = gimple_assign_rhs1 (last_stmt);
2118 oprnd1 = gimple_assign_rhs2 (last_stmt);
2119 if (TREE_CODE (oprnd0) != SSA_NAME
2120 || TREE_CODE (oprnd1) != SSA_NAME
2121 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2122 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2123 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2124 || TYPE_PRECISION (TREE_TYPE (lhs))
2125 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2126 return NULL;
2127
2128 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
2129 &def, &dt))
2130 return NULL;
2131
2132 if (dt != vect_internal_def)
2133 return NULL;
2134
2135 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2136 *type_out = *type_in;
2137 if (*type_in == NULL_TREE)
2138 return NULL;
2139
2140 def = NULL_TREE;
2141 if (gimple_assign_cast_p (def_stmt))
2142 {
2143 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2144 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2145 && TYPE_PRECISION (TREE_TYPE (rhs1))
2146 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2147 def = rhs1;
2148 }
2149
2150 if (def == NULL_TREE)
2151 {
2152 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2153 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
2154 NULL_TREE);
2155 new_pattern_def_seq (stmt_vinfo, def_stmt);
2156 }
2157
2158 /* Pattern detected. */
2159 if (dump_enabled_p ())
2160 dump_printf_loc (MSG_NOTE, vect_location,
2161 "vect_recog_vector_vector_shift_pattern: detected:\n");
2162
2163 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2164 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2165 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
2166
2167 if (dump_enabled_p ())
2168 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2169
2170 stmts->safe_push (last_stmt);
2171 return pattern_stmt;
2172 }
2173
2174 /* Detect a signed division by a constant that wouldn't be
2175 otherwise vectorized:
2176
2177 type a_t, b_t;
2178
2179 S1 a_t = b_t / N;
2180
2181 where type 'type' is an integral type and N is a constant.
2182
2183 Similarly handle modulo by a constant:
2184
2185 S4 a_t = b_t % N;
2186
2187 Input/Output:
2188
2189 * STMTS: Contains a stmt from which the pattern search begins,
2190 i.e. the division stmt. S1 is replaced by if N is a power
2191 of two constant and type is signed:
2192 S3 y_t = b_t < 0 ? N - 1 : 0;
2193 S2 x_t = b_t + y_t;
2194 S1' a_t = x_t >> log2 (N);
2195
2196 S4 is replaced if N is a power of two constant and
2197 type is signed by (where *_T temporaries have unsigned type):
2198 S9 y_T = b_t < 0 ? -1U : 0U;
2199 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2200 S7 z_t = (type) z_T;
2201 S6 w_t = b_t + z_t;
2202 S5 x_t = w_t & (N - 1);
2203 S4' a_t = x_t - z_t;
2204
2205 Output:
2206
2207 * TYPE_IN: The type of the input arguments to the pattern.
2208
2209 * TYPE_OUT: The type of the output of this pattern.
2210
2211 * Return value: A new stmt that will be used to replace the division
2212 S1 or modulo S4 stmt. */
2213
2214 static gimple
2215 vect_recog_divmod_pattern (vec<gimple> *stmts,
2216 tree *type_in, tree *type_out)
2217 {
2218 gimple last_stmt = stmts->pop ();
2219 tree oprnd0, oprnd1, vectype, itype, cond;
2220 gimple pattern_stmt, def_stmt;
2221 enum tree_code rhs_code;
2222 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2223 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2225 optab optab;
2226 tree q;
2227 int dummy_int, prec;
2228 stmt_vec_info def_stmt_vinfo;
2229
2230 if (!is_gimple_assign (last_stmt))
2231 return NULL;
2232
2233 rhs_code = gimple_assign_rhs_code (last_stmt);
2234 switch (rhs_code)
2235 {
2236 case TRUNC_DIV_EXPR:
2237 case TRUNC_MOD_EXPR:
2238 break;
2239 default:
2240 return NULL;
2241 }
2242
2243 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2244 return NULL;
2245
2246 oprnd0 = gimple_assign_rhs1 (last_stmt);
2247 oprnd1 = gimple_assign_rhs2 (last_stmt);
2248 itype = TREE_TYPE (oprnd0);
2249 if (TREE_CODE (oprnd0) != SSA_NAME
2250 || TREE_CODE (oprnd1) != INTEGER_CST
2251 || TREE_CODE (itype) != INTEGER_TYPE
2252 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2253 return NULL;
2254
2255 vectype = get_vectype_for_scalar_type (itype);
2256 if (vectype == NULL_TREE)
2257 return NULL;
2258
2259 /* If the target can handle vectorized division or modulo natively,
2260 don't attempt to optimize this. */
2261 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2262 if (optab != unknown_optab)
2263 {
2264 enum machine_mode vec_mode = TYPE_MODE (vectype);
2265 int icode = (int) optab_handler (optab, vec_mode);
2266 if (icode != CODE_FOR_nothing)
2267 return NULL;
2268 }
2269
2270 prec = TYPE_PRECISION (itype);
2271 if (integer_pow2p (oprnd1))
2272 {
2273 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2274 return NULL;
2275
2276 /* Pattern detected. */
2277 if (dump_enabled_p ())
2278 dump_printf_loc (MSG_NOTE, vect_location,
2279 "vect_recog_divmod_pattern: detected:\n");
2280
2281 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2282 build_int_cst (itype, 0));
2283 if (rhs_code == TRUNC_DIV_EXPR)
2284 {
2285 tree var = vect_recog_temp_ssa_var (itype, NULL);
2286 tree shift;
2287 def_stmt
2288 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2289 fold_build2 (MINUS_EXPR, itype,
2290 oprnd1,
2291 build_int_cst (itype,
2292 1)),
2293 build_int_cst (itype, 0));
2294 new_pattern_def_seq (stmt_vinfo, def_stmt);
2295 var = vect_recog_temp_ssa_var (itype, NULL);
2296 def_stmt
2297 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
2298 gimple_assign_lhs (def_stmt));
2299 append_pattern_def_seq (stmt_vinfo, def_stmt);
2300
2301 shift = build_int_cst (itype, tree_log2 (oprnd1));
2302 pattern_stmt
2303 = gimple_build_assign_with_ops (RSHIFT_EXPR,
2304 vect_recog_temp_ssa_var (itype,
2305 NULL),
2306 var, shift);
2307 }
2308 else
2309 {
2310 tree signmask;
2311 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2312 if (compare_tree_int (oprnd1, 2) == 0)
2313 {
2314 signmask = vect_recog_temp_ssa_var (itype, NULL);
2315 def_stmt
2316 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
2317 build_int_cst (itype, 1),
2318 build_int_cst (itype, 0));
2319 append_pattern_def_seq (stmt_vinfo, def_stmt);
2320 }
2321 else
2322 {
2323 tree utype
2324 = build_nonstandard_integer_type (prec, 1);
2325 tree vecutype = get_vectype_for_scalar_type (utype);
2326 tree shift
2327 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2328 - tree_log2 (oprnd1));
2329 tree var = vect_recog_temp_ssa_var (utype, NULL);
2330
2331 def_stmt
2332 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2333 build_int_cst (utype, -1),
2334 build_int_cst (utype, 0));
2335 def_stmt_vinfo
2336 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2337 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2338 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2339 append_pattern_def_seq (stmt_vinfo, def_stmt);
2340 var = vect_recog_temp_ssa_var (utype, NULL);
2341 def_stmt
2342 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2343 gimple_assign_lhs (def_stmt),
2344 shift);
2345 def_stmt_vinfo
2346 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2347 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2348 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2349 append_pattern_def_seq (stmt_vinfo, def_stmt);
2350 signmask = vect_recog_temp_ssa_var (itype, NULL);
2351 def_stmt
2352 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2353 NULL_TREE);
2354 append_pattern_def_seq (stmt_vinfo, def_stmt);
2355 }
2356 def_stmt
2357 = gimple_build_assign_with_ops (PLUS_EXPR,
2358 vect_recog_temp_ssa_var (itype,
2359 NULL),
2360 oprnd0, signmask);
2361 append_pattern_def_seq (stmt_vinfo, def_stmt);
2362 def_stmt
2363 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2364 vect_recog_temp_ssa_var (itype,
2365 NULL),
2366 gimple_assign_lhs (def_stmt),
2367 fold_build2 (MINUS_EXPR, itype,
2368 oprnd1,
2369 build_int_cst (itype,
2370 1)));
2371 append_pattern_def_seq (stmt_vinfo, def_stmt);
2372
2373 pattern_stmt
2374 = gimple_build_assign_with_ops (MINUS_EXPR,
2375 vect_recog_temp_ssa_var (itype,
2376 NULL),
2377 gimple_assign_lhs (def_stmt),
2378 signmask);
2379 }
2380
2381 if (dump_enabled_p ())
2382 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2383 0);
2384
2385 stmts->safe_push (last_stmt);
2386
2387 *type_in = vectype;
2388 *type_out = vectype;
2389 return pattern_stmt;
2390 }
2391
2392 if (prec > HOST_BITS_PER_WIDE_INT
2393 || integer_zerop (oprnd1))
2394 return NULL;
2395
2396 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2397 return NULL;
2398
2399 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2400
2401 if (TYPE_UNSIGNED (itype))
2402 {
2403 unsigned HOST_WIDE_INT mh, ml;
2404 int pre_shift, post_shift;
2405 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2406 & GET_MODE_MASK (TYPE_MODE (itype)));
2407 tree t1, t2, t3, t4;
2408
2409 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2410 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2411 return NULL;
2412
2413 /* Find a suitable multiplier and right shift count
2414 instead of multiplying with D. */
2415 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2416
2417 /* If the suggested multiplier is more than SIZE bits, we can do better
2418 for even divisors, using an initial right shift. */
2419 if (mh != 0 && (d & 1) == 0)
2420 {
2421 pre_shift = floor_log2 (d & -d);
2422 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2423 &ml, &post_shift, &dummy_int);
2424 gcc_assert (!mh);
2425 }
2426 else
2427 pre_shift = 0;
2428
2429 if (mh != 0)
2430 {
2431 if (post_shift - 1 >= prec)
2432 return NULL;
2433
2434 /* t1 = oprnd0 h* ml;
2435 t2 = oprnd0 - t1;
2436 t3 = t2 >> 1;
2437 t4 = t1 + t3;
2438 q = t4 >> (post_shift - 1); */
2439 t1 = vect_recog_temp_ssa_var (itype, NULL);
2440 def_stmt
2441 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2442 build_int_cst (itype, ml));
2443 append_pattern_def_seq (stmt_vinfo, def_stmt);
2444
2445 t2 = vect_recog_temp_ssa_var (itype, NULL);
2446 def_stmt
2447 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2448 append_pattern_def_seq (stmt_vinfo, def_stmt);
2449
2450 t3 = vect_recog_temp_ssa_var (itype, NULL);
2451 def_stmt
2452 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2453 integer_one_node);
2454 append_pattern_def_seq (stmt_vinfo, def_stmt);
2455
2456 t4 = vect_recog_temp_ssa_var (itype, NULL);
2457 def_stmt
2458 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2459
2460 if (post_shift != 1)
2461 {
2462 append_pattern_def_seq (stmt_vinfo, def_stmt);
2463
2464 q = vect_recog_temp_ssa_var (itype, NULL);
2465 pattern_stmt
2466 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2467 build_int_cst (itype,
2468 post_shift
2469 - 1));
2470 }
2471 else
2472 {
2473 q = t4;
2474 pattern_stmt = def_stmt;
2475 }
2476 }
2477 else
2478 {
2479 if (pre_shift >= prec || post_shift >= prec)
2480 return NULL;
2481
2482 /* t1 = oprnd0 >> pre_shift;
2483 t2 = t1 h* ml;
2484 q = t2 >> post_shift; */
2485 if (pre_shift)
2486 {
2487 t1 = vect_recog_temp_ssa_var (itype, NULL);
2488 def_stmt
2489 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2490 build_int_cst (NULL,
2491 pre_shift));
2492 append_pattern_def_seq (stmt_vinfo, def_stmt);
2493 }
2494 else
2495 t1 = oprnd0;
2496
2497 t2 = vect_recog_temp_ssa_var (itype, NULL);
2498 def_stmt
2499 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2500 build_int_cst (itype, ml));
2501
2502 if (post_shift)
2503 {
2504 append_pattern_def_seq (stmt_vinfo, def_stmt);
2505
2506 q = vect_recog_temp_ssa_var (itype, NULL);
2507 def_stmt
2508 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2509 build_int_cst (itype,
2510 post_shift));
2511 }
2512 else
2513 q = t2;
2514
2515 pattern_stmt = def_stmt;
2516 }
2517 }
2518 else
2519 {
2520 unsigned HOST_WIDE_INT ml;
2521 int post_shift;
2522 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2523 unsigned HOST_WIDE_INT abs_d;
2524 bool add = false;
2525 tree t1, t2, t3, t4;
2526
2527 /* Give up for -1. */
2528 if (d == -1)
2529 return NULL;
2530
2531 /* Since d might be INT_MIN, we have to cast to
2532 unsigned HOST_WIDE_INT before negating to avoid
2533 undefined signed overflow. */
2534 abs_d = (d >= 0
2535 ? (unsigned HOST_WIDE_INT) d
2536 : - (unsigned HOST_WIDE_INT) d);
2537
2538 /* n rem d = n rem -d */
2539 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2540 {
2541 d = abs_d;
2542 oprnd1 = build_int_cst (itype, abs_d);
2543 }
2544 else if (HOST_BITS_PER_WIDE_INT >= prec
2545 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2546 /* This case is not handled correctly below. */
2547 return NULL;
2548
2549 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2550 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2551 {
2552 add = true;
2553 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2554 }
2555 if (post_shift >= prec)
2556 return NULL;
2557
2558 /* t1 = oprnd0 h* ml; */
2559 t1 = vect_recog_temp_ssa_var (itype, NULL);
2560 def_stmt
2561 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2562 build_int_cst (itype, ml));
2563
2564 if (add)
2565 {
2566 /* t2 = t1 + oprnd0; */
2567 append_pattern_def_seq (stmt_vinfo, def_stmt);
2568 t2 = vect_recog_temp_ssa_var (itype, NULL);
2569 def_stmt
2570 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2571 }
2572 else
2573 t2 = t1;
2574
2575 if (post_shift)
2576 {
2577 /* t3 = t2 >> post_shift; */
2578 append_pattern_def_seq (stmt_vinfo, def_stmt);
2579 t3 = vect_recog_temp_ssa_var (itype, NULL);
2580 def_stmt
2581 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2582 build_int_cst (itype, post_shift));
2583 }
2584 else
2585 t3 = t2;
2586
2587 wide_int oprnd0_min, oprnd0_max;
2588 int msb = 1;
2589 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2590 {
2591 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2592 msb = 0;
2593 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2594 msb = -1;
2595 }
2596
2597 if (msb == 0 && d >= 0)
2598 {
2599 /* q = t3; */
2600 q = t3;
2601 pattern_stmt = def_stmt;
2602 }
2603 else
2604 {
2605 /* t4 = oprnd0 >> (prec - 1);
2606 or if we know from VRP that oprnd0 >= 0
2607 t4 = 0;
2608 or if we know from VRP that oprnd0 < 0
2609 t4 = -1; */
2610 append_pattern_def_seq (stmt_vinfo, def_stmt);
2611 t4 = vect_recog_temp_ssa_var (itype, NULL);
2612 if (msb != 1)
2613 def_stmt
2614 = gimple_build_assign_with_ops (INTEGER_CST,
2615 t4, build_int_cst (itype, msb),
2616 NULL_TREE);
2617 else
2618 def_stmt
2619 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2620 build_int_cst (itype, prec - 1));
2621 append_pattern_def_seq (stmt_vinfo, def_stmt);
2622
2623 /* q = t3 - t4; or q = t4 - t3; */
2624 q = vect_recog_temp_ssa_var (itype, NULL);
2625 pattern_stmt
2626 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2627 d < 0 ? t3 : t4);
2628 }
2629 }
2630
2631 if (rhs_code == TRUNC_MOD_EXPR)
2632 {
2633 tree r, t1;
2634
2635 /* We divided. Now finish by:
2636 t1 = q * oprnd1;
2637 r = oprnd0 - t1; */
2638 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2639
2640 t1 = vect_recog_temp_ssa_var (itype, NULL);
2641 def_stmt
2642 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2643 append_pattern_def_seq (stmt_vinfo, def_stmt);
2644
2645 r = vect_recog_temp_ssa_var (itype, NULL);
2646 pattern_stmt
2647 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2648 }
2649
2650 /* Pattern detected. */
2651 if (dump_enabled_p ())
2652 {
2653 dump_printf_loc (MSG_NOTE, vect_location,
2654 "vect_recog_divmod_pattern: detected: ");
2655 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2656 dump_printf (MSG_NOTE, "\n");
2657 }
2658
2659 stmts->safe_push (last_stmt);
2660
2661 *type_in = vectype;
2662 *type_out = vectype;
2663 return pattern_stmt;
2664 }
2665
2666 /* Function vect_recog_mixed_size_cond_pattern
2667
2668 Try to find the following pattern:
2669
2670 type x_t, y_t;
2671 TYPE a_T, b_T, c_T;
2672 loop:
2673 S1 a_T = x_t CMP y_t ? b_T : c_T;
2674
2675 where type 'TYPE' is an integral type which has different size
2676 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2677 than 'type', the constants need to fit into an integer type
2678 with the same width as 'type') or results of conversion from 'type'.
2679
2680 Input:
2681
2682 * LAST_STMT: A stmt from which the pattern search begins.
2683
2684 Output:
2685
2686 * TYPE_IN: The type of the input arguments to the pattern.
2687
2688 * TYPE_OUT: The type of the output of this pattern.
2689
2690 * Return value: A new stmt that will be used to replace the pattern.
2691 Additionally a def_stmt is added.
2692
2693 a_it = x_t CMP y_t ? b_it : c_it;
2694 a_T = (TYPE) a_it; */
2695
2696 static gimple
2697 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2698 tree *type_out)
2699 {
2700 gimple last_stmt = (*stmts)[0];
2701 tree cond_expr, then_clause, else_clause;
2702 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2703 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2704 enum machine_mode cmpmode;
2705 gimple pattern_stmt, def_stmt;
2706 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2707 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2708 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2709 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2710 bool promotion;
2711 tree comp_scalar_type;
2712
2713 if (!is_gimple_assign (last_stmt)
2714 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2715 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2716 return NULL;
2717
2718 cond_expr = gimple_assign_rhs1 (last_stmt);
2719 then_clause = gimple_assign_rhs2 (last_stmt);
2720 else_clause = gimple_assign_rhs3 (last_stmt);
2721
2722 if (!COMPARISON_CLASS_P (cond_expr))
2723 return NULL;
2724
2725 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2726 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2727 if (comp_vectype == NULL_TREE)
2728 return NULL;
2729
2730 type = gimple_expr_type (last_stmt);
2731 if (types_compatible_p (type, comp_scalar_type)
2732 || ((TREE_CODE (then_clause) != INTEGER_CST
2733 || TREE_CODE (else_clause) != INTEGER_CST)
2734 && !INTEGRAL_TYPE_P (comp_scalar_type))
2735 || !INTEGRAL_TYPE_P (type))
2736 return NULL;
2737
2738 if ((TREE_CODE (then_clause) != INTEGER_CST
2739 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2740 &def_stmt0, &promotion))
2741 || (TREE_CODE (else_clause) != INTEGER_CST
2742 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2743 &def_stmt1, &promotion)))
2744 return NULL;
2745
2746 if (orig_type0 && orig_type1
2747 && !types_compatible_p (orig_type0, orig_type1))
2748 return NULL;
2749
2750 if (orig_type0)
2751 {
2752 if (!types_compatible_p (orig_type0, comp_scalar_type))
2753 return NULL;
2754 then_clause = gimple_assign_rhs1 (def_stmt0);
2755 itype = orig_type0;
2756 }
2757
2758 if (orig_type1)
2759 {
2760 if (!types_compatible_p (orig_type1, comp_scalar_type))
2761 return NULL;
2762 else_clause = gimple_assign_rhs1 (def_stmt1);
2763 itype = orig_type1;
2764 }
2765
2766 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2767
2768 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2769 return NULL;
2770
2771 vectype = get_vectype_for_scalar_type (type);
2772 if (vectype == NULL_TREE)
2773 return NULL;
2774
2775 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2776 return NULL;
2777
2778 if (itype == NULL_TREE)
2779 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2780 TYPE_UNSIGNED (type));
2781
2782 if (itype == NULL_TREE
2783 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2784 return NULL;
2785
2786 vecitype = get_vectype_for_scalar_type (itype);
2787 if (vecitype == NULL_TREE)
2788 return NULL;
2789
2790 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2791 return NULL;
2792
2793 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2794 {
2795 if ((TREE_CODE (then_clause) == INTEGER_CST
2796 && !int_fits_type_p (then_clause, itype))
2797 || (TREE_CODE (else_clause) == INTEGER_CST
2798 && !int_fits_type_p (else_clause, itype)))
2799 return NULL;
2800 }
2801
2802 def_stmt
2803 = gimple_build_assign_with_ops (COND_EXPR,
2804 vect_recog_temp_ssa_var (itype, NULL),
2805 unshare_expr (cond_expr),
2806 fold_convert (itype, then_clause),
2807 fold_convert (itype, else_clause));
2808 pattern_stmt
2809 = gimple_build_assign_with_ops (NOP_EXPR,
2810 vect_recog_temp_ssa_var (type, NULL),
2811 gimple_assign_lhs (def_stmt), NULL_TREE);
2812
2813 new_pattern_def_seq (stmt_vinfo, def_stmt);
2814 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2815 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2816 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2817 *type_in = vecitype;
2818 *type_out = vectype;
2819
2820 if (dump_enabled_p ())
2821 dump_printf_loc (MSG_NOTE, vect_location,
2822 "vect_recog_mixed_size_cond_pattern: detected:\n");
2823
2824 return pattern_stmt;
2825 }
2826
2827
2828 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2829 true if bool VAR can be optimized that way. */
2830
2831 static bool
2832 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2833 {
2834 gimple def_stmt;
2835 enum vect_def_type dt;
2836 tree def, rhs1;
2837 enum tree_code rhs_code;
2838
2839 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2840 &dt))
2841 return false;
2842
2843 if (dt != vect_internal_def)
2844 return false;
2845
2846 if (!is_gimple_assign (def_stmt))
2847 return false;
2848
2849 if (!has_single_use (def))
2850 return false;
2851
2852 rhs1 = gimple_assign_rhs1 (def_stmt);
2853 rhs_code = gimple_assign_rhs_code (def_stmt);
2854 switch (rhs_code)
2855 {
2856 case SSA_NAME:
2857 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2858
2859 CASE_CONVERT:
2860 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2861 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2862 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2863 return false;
2864 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2865
2866 case BIT_NOT_EXPR:
2867 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2868
2869 case BIT_AND_EXPR:
2870 case BIT_IOR_EXPR:
2871 case BIT_XOR_EXPR:
2872 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2873 return false;
2874 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2875 bb_vinfo);
2876
2877 default:
2878 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2879 {
2880 tree vecitype, comp_vectype;
2881
2882 /* If the comparison can throw, then is_gimple_condexpr will be
2883 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2884 if (stmt_could_throw_p (def_stmt))
2885 return false;
2886
2887 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2888 if (comp_vectype == NULL_TREE)
2889 return false;
2890
2891 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2892 {
2893 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2894 tree itype
2895 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2896 vecitype = get_vectype_for_scalar_type (itype);
2897 if (vecitype == NULL_TREE)
2898 return false;
2899 }
2900 else
2901 vecitype = comp_vectype;
2902 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2903 }
2904 return false;
2905 }
2906 }
2907
2908
2909 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2910 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2911 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2912
2913 static tree
2914 adjust_bool_pattern_cast (tree type, tree var)
2915 {
2916 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2917 gimple cast_stmt, pattern_stmt;
2918
2919 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2920 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2921 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2922 cast_stmt
2923 = gimple_build_assign_with_ops (NOP_EXPR,
2924 vect_recog_temp_ssa_var (type, NULL),
2925 gimple_assign_lhs (pattern_stmt),
2926 NULL_TREE);
2927 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2928 return gimple_assign_lhs (cast_stmt);
2929 }
2930
2931
2932 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2933 recursively. VAR is an SSA_NAME that should be transformed from bool
2934 to a wider integer type, OUT_TYPE is the desired final integer type of
2935 the whole pattern, TRUEVAL should be NULL unless optimizing
2936 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2937 in the then_clause, STMTS is where statements with added pattern stmts
2938 should be pushed to. */
2939
2940 static tree
2941 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2942 vec<gimple> *stmts)
2943 {
2944 gimple stmt = SSA_NAME_DEF_STMT (var);
2945 enum tree_code rhs_code, def_rhs_code;
2946 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2947 location_t loc;
2948 gimple pattern_stmt, def_stmt;
2949
2950 rhs1 = gimple_assign_rhs1 (stmt);
2951 rhs2 = gimple_assign_rhs2 (stmt);
2952 rhs_code = gimple_assign_rhs_code (stmt);
2953 loc = gimple_location (stmt);
2954 switch (rhs_code)
2955 {
2956 case SSA_NAME:
2957 CASE_CONVERT:
2958 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2959 itype = TREE_TYPE (irhs1);
2960 pattern_stmt
2961 = gimple_build_assign_with_ops (SSA_NAME,
2962 vect_recog_temp_ssa_var (itype, NULL),
2963 irhs1, NULL_TREE);
2964 break;
2965
2966 case BIT_NOT_EXPR:
2967 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2968 itype = TREE_TYPE (irhs1);
2969 pattern_stmt
2970 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2971 vect_recog_temp_ssa_var (itype, NULL),
2972 irhs1, build_int_cst (itype, 1));
2973 break;
2974
2975 case BIT_AND_EXPR:
2976 /* Try to optimize x = y & (a < b ? 1 : 0); into
2977 x = (a < b ? y : 0);
2978
2979 E.g. for:
2980 bool a_b, b_b, c_b;
2981 TYPE d_T;
2982
2983 S1 a_b = x1 CMP1 y1;
2984 S2 b_b = x2 CMP2 y2;
2985 S3 c_b = a_b & b_b;
2986 S4 d_T = (TYPE) c_b;
2987
2988 we would normally emit:
2989
2990 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2991 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2992 S3' c_T = a_T & b_T;
2993 S4' d_T = c_T;
2994
2995 but we can save one stmt by using the
2996 result of one of the COND_EXPRs in the other COND_EXPR and leave
2997 BIT_AND_EXPR stmt out:
2998
2999 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3000 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3001 S4' f_T = c_T;
3002
3003 At least when VEC_COND_EXPR is implemented using masks
3004 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3005 computes the comparison masks and ands it, in one case with
3006 all ones vector, in the other case with a vector register.
3007 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3008 often more expensive. */
3009 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3010 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3011 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3012 {
3013 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3014 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3015 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3016 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3017 {
3018 gimple tstmt;
3019 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3020 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3021 tstmt = stmts->pop ();
3022 gcc_assert (tstmt == def_stmt);
3023 stmts->quick_push (stmt);
3024 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3025 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3026 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3027 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3028 return irhs2;
3029 }
3030 else
3031 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3032 goto and_ior_xor;
3033 }
3034 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3035 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3036 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3037 {
3038 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3039 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3040 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3041 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3042 {
3043 gimple tstmt;
3044 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3045 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3046 tstmt = stmts->pop ();
3047 gcc_assert (tstmt == def_stmt);
3048 stmts->quick_push (stmt);
3049 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3050 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3051 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3052 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3053 return irhs1;
3054 }
3055 else
3056 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3057 goto and_ior_xor;
3058 }
3059 /* FALLTHRU */
3060 case BIT_IOR_EXPR:
3061 case BIT_XOR_EXPR:
3062 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3063 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3064 and_ior_xor:
3065 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3066 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3067 {
3068 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3069 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3070 int out_prec = TYPE_PRECISION (out_type);
3071 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3072 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3073 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3074 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3075 else
3076 {
3077 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3078 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3079 }
3080 }
3081 itype = TREE_TYPE (irhs1);
3082 pattern_stmt
3083 = gimple_build_assign_with_ops (rhs_code,
3084 vect_recog_temp_ssa_var (itype, NULL),
3085 irhs1, irhs2);
3086 break;
3087
3088 default:
3089 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3090 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3091 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3092 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3093 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3094 {
3095 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3096 itype
3097 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3098 }
3099 else
3100 itype = TREE_TYPE (rhs1);
3101 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3102 if (trueval == NULL_TREE)
3103 trueval = build_int_cst (itype, 1);
3104 else
3105 gcc_checking_assert (useless_type_conversion_p (itype,
3106 TREE_TYPE (trueval)));
3107 pattern_stmt
3108 = gimple_build_assign_with_ops (COND_EXPR,
3109 vect_recog_temp_ssa_var (itype, NULL),
3110 cond_expr, trueval,
3111 build_int_cst (itype, 0));
3112 break;
3113 }
3114
3115 stmts->safe_push (stmt);
3116 gimple_set_location (pattern_stmt, loc);
3117 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3118 return gimple_assign_lhs (pattern_stmt);
3119 }
3120
3121
3122 /* Function vect_recog_bool_pattern
3123
3124 Try to find pattern like following:
3125
3126 bool a_b, b_b, c_b, d_b, e_b;
3127 TYPE f_T;
3128 loop:
3129 S1 a_b = x1 CMP1 y1;
3130 S2 b_b = x2 CMP2 y2;
3131 S3 c_b = a_b & b_b;
3132 S4 d_b = x3 CMP3 y3;
3133 S5 e_b = c_b | d_b;
3134 S6 f_T = (TYPE) e_b;
3135
3136 where type 'TYPE' is an integral type. Or a similar pattern
3137 ending in
3138
3139 S6 f_Y = e_b ? r_Y : s_Y;
3140
3141 as results from if-conversion of a complex condition.
3142
3143 Input:
3144
3145 * LAST_STMT: A stmt at the end from which the pattern
3146 search begins, i.e. cast of a bool to
3147 an integer type.
3148
3149 Output:
3150
3151 * TYPE_IN: The type of the input arguments to the pattern.
3152
3153 * TYPE_OUT: The type of the output of this pattern.
3154
3155 * Return value: A new stmt that will be used to replace the pattern.
3156
3157 Assuming size of TYPE is the same as size of all comparisons
3158 (otherwise some casts would be added where needed), the above
3159 sequence we create related pattern stmts:
3160 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3161 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3162 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3163 S5' e_T = c_T | d_T;
3164 S6' f_T = e_T;
3165
3166 Instead of the above S3' we could emit:
3167 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3168 S3' c_T = a_T | b_T;
3169 but the above is more efficient. */
3170
3171 static gimple
3172 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
3173 tree *type_out)
3174 {
3175 gimple last_stmt = stmts->pop ();
3176 enum tree_code rhs_code;
3177 tree var, lhs, rhs, vectype;
3178 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3179 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3180 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
3181 gimple pattern_stmt;
3182
3183 if (!is_gimple_assign (last_stmt))
3184 return NULL;
3185
3186 var = gimple_assign_rhs1 (last_stmt);
3187 lhs = gimple_assign_lhs (last_stmt);
3188
3189 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3190 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3191 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3192 return NULL;
3193
3194 rhs_code = gimple_assign_rhs_code (last_stmt);
3195 if (CONVERT_EXPR_CODE_P (rhs_code))
3196 {
3197 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3198 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3199 return NULL;
3200 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3201 if (vectype == NULL_TREE)
3202 return NULL;
3203
3204 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3205 return NULL;
3206
3207 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3208 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3209 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3210 pattern_stmt
3211 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
3212 else
3213 pattern_stmt
3214 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
3215 *type_out = vectype;
3216 *type_in = vectype;
3217 stmts->safe_push (last_stmt);
3218 if (dump_enabled_p ())
3219 dump_printf_loc (MSG_NOTE, vect_location,
3220 "vect_recog_bool_pattern: detected:\n");
3221
3222 return pattern_stmt;
3223 }
3224 else if (rhs_code == COND_EXPR
3225 && TREE_CODE (var) == SSA_NAME)
3226 {
3227 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3228 if (vectype == NULL_TREE)
3229 return NULL;
3230
3231 /* Build a scalar type for the boolean result that when
3232 vectorized matches the vector type of the result in
3233 size and number of elements. */
3234 unsigned prec
3235 = wi::udiv_trunc (TYPE_SIZE (vectype),
3236 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3237 tree type
3238 = build_nonstandard_integer_type (prec,
3239 TYPE_UNSIGNED (TREE_TYPE (var)));
3240 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3241 return NULL;
3242
3243 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3244 return NULL;
3245
3246 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3247 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3248 pattern_stmt
3249 = gimple_build_assign_with_ops (COND_EXPR, lhs,
3250 build2 (NE_EXPR, boolean_type_node,
3251 rhs, build_int_cst (type, 0)),
3252 gimple_assign_rhs2 (last_stmt),
3253 gimple_assign_rhs3 (last_stmt));
3254 *type_out = vectype;
3255 *type_in = vectype;
3256 stmts->safe_push (last_stmt);
3257 if (dump_enabled_p ())
3258 dump_printf_loc (MSG_NOTE, vect_location,
3259 "vect_recog_bool_pattern: detected:\n");
3260
3261 return pattern_stmt;
3262 }
3263 else if (rhs_code == SSA_NAME
3264 && STMT_VINFO_DATA_REF (stmt_vinfo))
3265 {
3266 stmt_vec_info pattern_stmt_info;
3267 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3268 gcc_assert (vectype != NULL_TREE);
3269 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3270 return NULL;
3271 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3272 return NULL;
3273
3274 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3275 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3276 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3277 {
3278 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3279 gimple cast_stmt
3280 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
3281 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3282 rhs = rhs2;
3283 }
3284 pattern_stmt
3285 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
3286 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3287 bb_vinfo);
3288 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3289 STMT_VINFO_DATA_REF (pattern_stmt_info)
3290 = STMT_VINFO_DATA_REF (stmt_vinfo);
3291 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3292 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3293 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3294 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3295 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3296 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3297 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3298 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3299 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3300 *type_out = vectype;
3301 *type_in = vectype;
3302 stmts->safe_push (last_stmt);
3303 if (dump_enabled_p ())
3304 dump_printf_loc (MSG_NOTE, vect_location,
3305 "vect_recog_bool_pattern: detected:\n");
3306 return pattern_stmt;
3307 }
3308 else
3309 return NULL;
3310 }
3311
3312
3313 /* Mark statements that are involved in a pattern. */
3314
3315 static inline void
3316 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3317 tree pattern_vectype)
3318 {
3319 stmt_vec_info pattern_stmt_info, def_stmt_info;
3320 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3321 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
3322 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
3323 gimple def_stmt;
3324
3325 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3326 if (pattern_stmt_info == NULL)
3327 {
3328 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3329 bb_vinfo);
3330 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3331 }
3332 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3333
3334 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3335 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3336 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3337 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3338 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3339 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3340 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3341 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3342 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3343 {
3344 gimple_stmt_iterator si;
3345 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3346 !gsi_end_p (si); gsi_next (&si))
3347 {
3348 def_stmt = gsi_stmt (si);
3349 def_stmt_info = vinfo_for_stmt (def_stmt);
3350 if (def_stmt_info == NULL)
3351 {
3352 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3353 bb_vinfo);
3354 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3355 }
3356 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3357 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3358 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3359 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3360 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3361 }
3362 }
3363 }
3364
3365 /* Function vect_pattern_recog_1
3366
3367 Input:
3368 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3369 computation pattern.
3370 STMT: A stmt from which the pattern search should start.
3371
3372 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3373 expression that computes the same functionality and can be used to
3374 replace the sequence of stmts that are involved in the pattern.
3375
3376 Output:
3377 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3378 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3379 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3380 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3381 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3382 to the available target pattern.
3383
3384 This function also does some bookkeeping, as explained in the documentation
3385 for vect_recog_pattern. */
3386
3387 static void
3388 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3389 gimple_stmt_iterator si,
3390 vec<gimple> *stmts_to_replace)
3391 {
3392 gimple stmt = gsi_stmt (si), pattern_stmt;
3393 stmt_vec_info stmt_info;
3394 loop_vec_info loop_vinfo;
3395 tree pattern_vectype;
3396 tree type_in, type_out;
3397 enum tree_code code;
3398 int i;
3399 gimple next;
3400
3401 stmts_to_replace->truncate (0);
3402 stmts_to_replace->quick_push (stmt);
3403 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3404 if (!pattern_stmt)
3405 return;
3406
3407 stmt = stmts_to_replace->last ();
3408 stmt_info = vinfo_for_stmt (stmt);
3409 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3410
3411 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3412 {
3413 /* No need to check target support (already checked by the pattern
3414 recognition function). */
3415 pattern_vectype = type_out ? type_out : type_in;
3416 }
3417 else
3418 {
3419 enum machine_mode vec_mode;
3420 enum insn_code icode;
3421 optab optab;
3422
3423 /* Check target support */
3424 type_in = get_vectype_for_scalar_type (type_in);
3425 if (!type_in)
3426 return;
3427 if (type_out)
3428 type_out = get_vectype_for_scalar_type (type_out);
3429 else
3430 type_out = type_in;
3431 if (!type_out)
3432 return;
3433 pattern_vectype = type_out;
3434
3435 if (is_gimple_assign (pattern_stmt))
3436 code = gimple_assign_rhs_code (pattern_stmt);
3437 else
3438 {
3439 gcc_assert (is_gimple_call (pattern_stmt));
3440 code = CALL_EXPR;
3441 }
3442
3443 optab = optab_for_tree_code (code, type_in, optab_default);
3444 vec_mode = TYPE_MODE (type_in);
3445 if (!optab
3446 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3447 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3448 return;
3449 }
3450
3451 /* Found a vectorizable pattern. */
3452 if (dump_enabled_p ())
3453 {
3454 dump_printf_loc (MSG_NOTE, vect_location,
3455 "pattern recognized: ");
3456 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3457 dump_printf (MSG_NOTE, "\n");
3458 }
3459
3460 /* Mark the stmts that are involved in the pattern. */
3461 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3462
3463 /* Patterns cannot be vectorized using SLP, because they change the order of
3464 computation. */
3465 if (loop_vinfo)
3466 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3467 if (next == stmt)
3468 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3469
3470 /* It is possible that additional pattern stmts are created and inserted in
3471 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3472 relevant statements. */
3473 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3474 && (unsigned) i < (stmts_to_replace->length () - 1);
3475 i++)
3476 {
3477 stmt_info = vinfo_for_stmt (stmt);
3478 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3479 if (dump_enabled_p ())
3480 {
3481 dump_printf_loc (MSG_NOTE, vect_location,
3482 "additional pattern stmt: ");
3483 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3484 dump_printf (MSG_NOTE, "\n");
3485 }
3486
3487 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3488 }
3489 }
3490
3491
3492 /* Function vect_pattern_recog
3493
3494 Input:
3495 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3496 computation idioms.
3497
3498 Output - for each computation idiom that is detected we create a new stmt
3499 that provides the same functionality and that can be vectorized. We
3500 also record some information in the struct_stmt_info of the relevant
3501 stmts, as explained below:
3502
3503 At the entry to this function we have the following stmts, with the
3504 following initial value in the STMT_VINFO fields:
3505
3506 stmt in_pattern_p related_stmt vec_stmt
3507 S1: a_i = .... - - -
3508 S2: a_2 = ..use(a_i).. - - -
3509 S3: a_1 = ..use(a_2).. - - -
3510 S4: a_0 = ..use(a_1).. - - -
3511 S5: ... = ..use(a_0).. - - -
3512
3513 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3514 represented by a single stmt. We then:
3515 - create a new stmt S6 equivalent to the pattern (the stmt is not
3516 inserted into the code)
3517 - fill in the STMT_VINFO fields as follows:
3518
3519 in_pattern_p related_stmt vec_stmt
3520 S1: a_i = .... - - -
3521 S2: a_2 = ..use(a_i).. - - -
3522 S3: a_1 = ..use(a_2).. - - -
3523 S4: a_0 = ..use(a_1).. true S6 -
3524 '---> S6: a_new = .... - S4 -
3525 S5: ... = ..use(a_0).. - - -
3526
3527 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3528 to each other through the RELATED_STMT field).
3529
3530 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3531 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3532 remain irrelevant unless used by stmts other than S4.
3533
3534 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3535 (because they are marked as irrelevant). It will vectorize S6, and record
3536 a pointer to the new vector stmt VS6 from S6 (as usual).
3537 S4 will be skipped, and S5 will be vectorized as usual:
3538
3539 in_pattern_p related_stmt vec_stmt
3540 S1: a_i = .... - - -
3541 S2: a_2 = ..use(a_i).. - - -
3542 S3: a_1 = ..use(a_2).. - - -
3543 > VS6: va_new = .... - - -
3544 S4: a_0 = ..use(a_1).. true S6 VS6
3545 '---> S6: a_new = .... - S4 VS6
3546 > VS5: ... = ..vuse(va_new).. - - -
3547 S5: ... = ..use(a_0).. - - -
3548
3549 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3550 elsewhere), and we'll end up with:
3551
3552 VS6: va_new = ....
3553 VS5: ... = ..vuse(va_new)..
3554
3555 In case of more than one pattern statements, e.g., widen-mult with
3556 intermediate type:
3557
3558 S1 a_t = ;
3559 S2 a_T = (TYPE) a_t;
3560 '--> S3: a_it = (interm_type) a_t;
3561 S4 prod_T = a_T * CONST;
3562 '--> S5: prod_T' = a_it w* CONST;
3563
3564 there may be other users of a_T outside the pattern. In that case S2 will
3565 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3566 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3567 be recorded in S3. */
3568
3569 void
3570 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3571 {
3572 struct loop *loop;
3573 basic_block *bbs;
3574 unsigned int nbbs;
3575 gimple_stmt_iterator si;
3576 unsigned int i, j;
3577 vect_recog_func_ptr vect_recog_func;
3578 auto_vec<gimple, 1> stmts_to_replace;
3579 gimple stmt;
3580
3581 if (dump_enabled_p ())
3582 dump_printf_loc (MSG_NOTE, vect_location,
3583 "=== vect_pattern_recog ===\n");
3584
3585 if (loop_vinfo)
3586 {
3587 loop = LOOP_VINFO_LOOP (loop_vinfo);
3588 bbs = LOOP_VINFO_BBS (loop_vinfo);
3589 nbbs = loop->num_nodes;
3590 }
3591 else
3592 {
3593 bbs = &BB_VINFO_BB (bb_vinfo);
3594 nbbs = 1;
3595 }
3596
3597 /* Scan through the loop stmts, applying the pattern recognition
3598 functions starting at each stmt visited: */
3599 for (i = 0; i < nbbs; i++)
3600 {
3601 basic_block bb = bbs[i];
3602 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3603 {
3604 if (bb_vinfo && (stmt = gsi_stmt (si))
3605 && vinfo_for_stmt (stmt)
3606 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3607 continue;
3608
3609 /* Scan over all generic vect_recog_xxx_pattern functions. */
3610 for (j = 0; j < NUM_PATTERNS; j++)
3611 {
3612 vect_recog_func = vect_vect_recog_func_ptrs[j];
3613 vect_pattern_recog_1 (vect_recog_func, si,
3614 &stmts_to_replace);
3615 }
3616 }
3617 }
3618 }