]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-patterns.c
Update copyright years.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44
45 /* Pattern recognition functions */
46 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
47 tree *);
48 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
49 tree *);
50 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
51 tree *);
52 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
53 tree *);
54 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
55 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
56 tree *);
57 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
58 tree *, tree *);
59 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
60 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
61 tree *, tree *);
62 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
63 tree *, tree *);
64
65 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
66 tree *, tree *);
67
68 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
69 tree *, tree *);
70 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
71 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
72
73 struct vect_recog_func
74 {
75 vect_recog_func_ptr fn;
76 const char *name;
77 };
78 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
79 { vect_recog_widen_mult_pattern, "widen_mult" },
80 { vect_recog_widen_sum_pattern, "widen_sum" },
81 { vect_recog_dot_prod_pattern, "dot_prod" },
82 { vect_recog_sad_pattern, "sad" },
83 { vect_recog_pow_pattern, "pow" },
84 { vect_recog_widen_shift_pattern, "widen_shift" },
85 { vect_recog_over_widening_pattern, "over_widening" },
86 { vect_recog_rotate_pattern, "rotate" },
87 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
88 { vect_recog_divmod_pattern, "divmod" },
89 { vect_recog_mult_pattern, "mult" },
90 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
91 { vect_recog_bool_pattern, "bool" },
92 { vect_recog_mask_conversion_pattern, "mask_conversion" }
93 };
94
95 static inline void
96 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
97 {
98 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
99 stmt);
100 }
101
102 static inline void
103 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
104 {
105 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
106 append_pattern_def_seq (stmt_info, stmt);
107 }
108
109 /* Check whether STMT2 is in the same loop or basic block as STMT1.
110 Which of the two applies depends on whether we're currently doing
111 loop-based or basic-block-based vectorization, as determined by
112 the vinfo_for_stmt for STMT1 (which must be defined).
113
114 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
115 to be defined as well. */
116
117 static bool
118 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
119 {
120 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
121 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
122 }
123
124 /* If the LHS of DEF_STMT has a single use, and that statement is
125 in the same loop or basic block, return it. */
126
127 static gimple *
128 vect_single_imm_use (gimple *def_stmt)
129 {
130 tree lhs = gimple_assign_lhs (def_stmt);
131 use_operand_p use_p;
132 gimple *use_stmt;
133
134 if (!single_imm_use (lhs, &use_p, &use_stmt))
135 return NULL;
136
137 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
138 return NULL;
139
140 return use_stmt;
141 }
142
143 /* Check whether NAME, an ssa-name used in USE_STMT,
144 is a result of a type promotion, such that:
145 DEF_STMT: NAME = NOP (name0)
146 If CHECK_SIGN is TRUE, check that either both types are signed or both are
147 unsigned. */
148
149 static bool
150 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
151 tree *orig_type, gimple **def_stmt, bool *promotion)
152 {
153 gimple *dummy_gimple;
154 stmt_vec_info stmt_vinfo;
155 tree type = TREE_TYPE (name);
156 tree oprnd0;
157 enum vect_def_type dt;
158
159 stmt_vinfo = vinfo_for_stmt (use_stmt);
160 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
161 return false;
162
163 if (dt != vect_internal_def
164 && dt != vect_external_def && dt != vect_constant_def)
165 return false;
166
167 if (!*def_stmt)
168 return false;
169
170 if (!is_gimple_assign (*def_stmt))
171 return false;
172
173 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
174 return false;
175
176 oprnd0 = gimple_assign_rhs1 (*def_stmt);
177
178 *orig_type = TREE_TYPE (oprnd0);
179 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
180 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
181 return false;
182
183 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
184 *promotion = true;
185 else
186 *promotion = false;
187
188 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
189 return false;
190
191 return true;
192 }
193
194 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
195 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
196
197 static tree
198 vect_recog_temp_ssa_var (tree type, gimple *stmt)
199 {
200 return make_temp_ssa_name (type, stmt, "patt");
201 }
202
203 /* Function vect_recog_dot_prod_pattern
204
205 Try to find the following pattern:
206
207 type x_t, y_t;
208 TYPE1 prod;
209 TYPE2 sum = init;
210 loop:
211 sum_0 = phi <init, sum_1>
212 S1 x_t = ...
213 S2 y_t = ...
214 S3 x_T = (TYPE1) x_t;
215 S4 y_T = (TYPE1) y_t;
216 S5 prod = x_T * y_T;
217 [S6 prod = (TYPE2) prod; #optional]
218 S7 sum_1 = prod + sum_0;
219
220 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
221 same size of 'TYPE1' or bigger. This is a special case of a reduction
222 computation.
223
224 Input:
225
226 * STMTS: Contains a stmt from which the pattern search begins. In the
227 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
228 will be detected.
229
230 Output:
231
232 * TYPE_IN: The type of the input arguments to the pattern.
233
234 * TYPE_OUT: The type of the output of this pattern.
235
236 * Return value: A new stmt that will be used to replace the sequence of
237 stmts that constitute the pattern. In this case it will be:
238 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
239
240 Note: The dot-prod idiom is a widening reduction pattern that is
241 vectorized without preserving all the intermediate results. It
242 produces only N/2 (widened) results (by summing up pairs of
243 intermediate results) rather than all N results. Therefore, we
244 cannot allow this pattern when we want to get all the results and in
245 the correct order (as is the case when this computation is in an
246 inner-loop nested in an outer-loop that us being vectorized). */
247
248 static gimple *
249 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
250 tree *type_out)
251 {
252 gimple *stmt, *last_stmt = (*stmts)[0];
253 tree oprnd0, oprnd1;
254 tree oprnd00, oprnd01;
255 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
256 tree type, half_type;
257 gimple *pattern_stmt;
258 tree prod_type;
259 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
260 struct loop *loop;
261 tree var;
262 bool promotion;
263
264 if (!loop_info)
265 return NULL;
266
267 loop = LOOP_VINFO_LOOP (loop_info);
268
269 /* We don't allow changing the order of the computation in the inner-loop
270 when doing outer-loop vectorization. */
271 if (loop && nested_in_vect_loop_p (loop, last_stmt))
272 return NULL;
273
274 if (!is_gimple_assign (last_stmt))
275 return NULL;
276
277 type = gimple_expr_type (last_stmt);
278
279 /* Look for the following pattern
280 DX = (TYPE1) X;
281 DY = (TYPE1) Y;
282 DPROD = DX * DY;
283 DDPROD = (TYPE2) DPROD;
284 sum_1 = DDPROD + sum_0;
285 In which
286 - DX is double the size of X
287 - DY is double the size of Y
288 - DX, DY, DPROD all have the same type
289 - sum is the same size of DPROD or bigger
290 - sum has been recognized as a reduction variable.
291
292 This is equivalent to:
293 DPROD = X w* Y; #widen mult
294 sum_1 = DPROD w+ sum_0; #widen summation
295 or
296 DPROD = X w* Y; #widen mult
297 sum_1 = DPROD + sum_0; #summation
298 */
299
300 /* Starting from LAST_STMT, follow the defs of its uses in search
301 of the above pattern. */
302
303 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
304 return NULL;
305
306 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
307 {
308 /* Has been detected as widening-summation? */
309
310 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
311 type = gimple_expr_type (stmt);
312 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
313 return NULL;
314 oprnd0 = gimple_assign_rhs1 (stmt);
315 oprnd1 = gimple_assign_rhs2 (stmt);
316 half_type = TREE_TYPE (oprnd0);
317 }
318 else
319 {
320 gimple *def_stmt;
321
322 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
323 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
324 return NULL;
325 oprnd0 = gimple_assign_rhs1 (last_stmt);
326 oprnd1 = gimple_assign_rhs2 (last_stmt);
327 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
328 || !types_compatible_p (TREE_TYPE (oprnd1), type))
329 return NULL;
330 stmt = last_stmt;
331
332 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
333 &promotion)
334 && promotion)
335 {
336 stmt = def_stmt;
337 oprnd0 = gimple_assign_rhs1 (stmt);
338 }
339 else
340 half_type = type;
341 }
342
343 /* So far so good. Since last_stmt was detected as a (summation) reduction,
344 we know that oprnd1 is the reduction variable (defined by a loop-header
345 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
346 Left to check that oprnd0 is defined by a (widen_)mult_expr */
347 if (TREE_CODE (oprnd0) != SSA_NAME)
348 return NULL;
349
350 prod_type = half_type;
351 stmt = SSA_NAME_DEF_STMT (oprnd0);
352
353 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
354 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
355 return NULL;
356
357 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
358 inside the loop (in case we are analyzing an outer-loop). */
359 if (!is_gimple_assign (stmt))
360 return NULL;
361 stmt_vinfo = vinfo_for_stmt (stmt);
362 gcc_assert (stmt_vinfo);
363 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
364 return NULL;
365 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
366 return NULL;
367 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
368 {
369 /* Has been detected as a widening multiplication? */
370
371 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
372 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
373 return NULL;
374 stmt_vinfo = vinfo_for_stmt (stmt);
375 gcc_assert (stmt_vinfo);
376 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
377 oprnd00 = gimple_assign_rhs1 (stmt);
378 oprnd01 = gimple_assign_rhs2 (stmt);
379 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
380 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
381 }
382 else
383 {
384 tree half_type0, half_type1;
385 gimple *def_stmt;
386 tree oprnd0, oprnd1;
387
388 oprnd0 = gimple_assign_rhs1 (stmt);
389 oprnd1 = gimple_assign_rhs2 (stmt);
390 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
391 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
392 return NULL;
393 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
394 &promotion)
395 || !promotion)
396 return NULL;
397 oprnd00 = gimple_assign_rhs1 (def_stmt);
398 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
399 &promotion)
400 || !promotion)
401 return NULL;
402 oprnd01 = gimple_assign_rhs1 (def_stmt);
403 if (!types_compatible_p (half_type0, half_type1))
404 return NULL;
405 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
406 return NULL;
407 }
408
409 half_type = TREE_TYPE (oprnd00);
410 *type_in = half_type;
411 *type_out = type;
412
413 /* Pattern detected. Create a stmt to be used to replace the pattern: */
414 var = vect_recog_temp_ssa_var (type, NULL);
415 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
416 oprnd00, oprnd01, oprnd1);
417
418 if (dump_enabled_p ())
419 {
420 dump_printf_loc (MSG_NOTE, vect_location,
421 "vect_recog_dot_prod_pattern: detected: ");
422 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
423 dump_printf (MSG_NOTE, "\n");
424 }
425
426 return pattern_stmt;
427 }
428
429
430 /* Function vect_recog_sad_pattern
431
432 Try to find the following Sum of Absolute Difference (SAD) pattern:
433
434 type x_t, y_t;
435 signed TYPE1 diff, abs_diff;
436 TYPE2 sum = init;
437 loop:
438 sum_0 = phi <init, sum_1>
439 S1 x_t = ...
440 S2 y_t = ...
441 S3 x_T = (TYPE1) x_t;
442 S4 y_T = (TYPE1) y_t;
443 S5 diff = x_T - y_T;
444 S6 abs_diff = ABS_EXPR <diff>;
445 [S7 abs_diff = (TYPE2) abs_diff; #optional]
446 S8 sum_1 = abs_diff + sum_0;
447
448 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
449 same size of 'TYPE1' or bigger. This is a special case of a reduction
450 computation.
451
452 Input:
453
454 * STMTS: Contains a stmt from which the pattern search begins. In the
455 example, when this function is called with S8, the pattern
456 {S3,S4,S5,S6,S7,S8} will be detected.
457
458 Output:
459
460 * TYPE_IN: The type of the input arguments to the pattern.
461
462 * TYPE_OUT: The type of the output of this pattern.
463
464 * Return value: A new stmt that will be used to replace the sequence of
465 stmts that constitute the pattern. In this case it will be:
466 SAD_EXPR <x_t, y_t, sum_0>
467 */
468
469 static gimple *
470 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
471 tree *type_out)
472 {
473 gimple *last_stmt = (*stmts)[0];
474 tree sad_oprnd0, sad_oprnd1;
475 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
476 tree half_type;
477 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
478 struct loop *loop;
479 bool promotion;
480
481 if (!loop_info)
482 return NULL;
483
484 loop = LOOP_VINFO_LOOP (loop_info);
485
486 /* We don't allow changing the order of the computation in the inner-loop
487 when doing outer-loop vectorization. */
488 if (loop && nested_in_vect_loop_p (loop, last_stmt))
489 return NULL;
490
491 if (!is_gimple_assign (last_stmt))
492 return NULL;
493
494 tree sum_type = gimple_expr_type (last_stmt);
495
496 /* Look for the following pattern
497 DX = (TYPE1) X;
498 DY = (TYPE1) Y;
499 DDIFF = DX - DY;
500 DAD = ABS_EXPR <DDIFF>;
501 DDPROD = (TYPE2) DPROD;
502 sum_1 = DAD + sum_0;
503 In which
504 - DX is at least double the size of X
505 - DY is at least double the size of Y
506 - DX, DY, DDIFF, DAD all have the same type
507 - sum is the same size of DAD or bigger
508 - sum has been recognized as a reduction variable.
509
510 This is equivalent to:
511 DDIFF = X w- Y; #widen sub
512 DAD = ABS_EXPR <DDIFF>;
513 sum_1 = DAD w+ sum_0; #widen summation
514 or
515 DDIFF = X w- Y; #widen sub
516 DAD = ABS_EXPR <DDIFF>;
517 sum_1 = DAD + sum_0; #summation
518 */
519
520 /* Starting from LAST_STMT, follow the defs of its uses in search
521 of the above pattern. */
522
523 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
524 return NULL;
525
526 tree plus_oprnd0, plus_oprnd1;
527
528 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
529 {
530 /* Has been detected as widening-summation? */
531
532 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
533 sum_type = gimple_expr_type (stmt);
534 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
535 return NULL;
536 plus_oprnd0 = gimple_assign_rhs1 (stmt);
537 plus_oprnd1 = gimple_assign_rhs2 (stmt);
538 half_type = TREE_TYPE (plus_oprnd0);
539 }
540 else
541 {
542 gimple *def_stmt;
543
544 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
545 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
546 return NULL;
547 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
548 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
549 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
550 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
551 return NULL;
552
553 /* The type conversion could be promotion, demotion,
554 or just signed -> unsigned. */
555 if (type_conversion_p (plus_oprnd0, last_stmt, false,
556 &half_type, &def_stmt, &promotion))
557 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
558 else
559 half_type = sum_type;
560 }
561
562 /* So far so good. Since last_stmt was detected as a (summation) reduction,
563 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
564 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
565 Then check that plus_oprnd0 is defined by an abs_expr. */
566
567 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
568 return NULL;
569
570 tree abs_type = half_type;
571 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
572
573 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
574 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
575 return NULL;
576
577 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
578 inside the loop (in case we are analyzing an outer-loop). */
579 if (!is_gimple_assign (abs_stmt))
580 return NULL;
581
582 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
583 gcc_assert (abs_stmt_vinfo);
584 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
585 return NULL;
586 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
587 return NULL;
588
589 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
590 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
591 return NULL;
592 if (TYPE_UNSIGNED (abs_type))
593 return NULL;
594
595 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
596
597 if (TREE_CODE (abs_oprnd) != SSA_NAME)
598 return NULL;
599
600 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
601
602 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
603 if (!gimple_bb (diff_stmt)
604 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
605 return NULL;
606
607 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
608 inside the loop (in case we are analyzing an outer-loop). */
609 if (!is_gimple_assign (diff_stmt))
610 return NULL;
611
612 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
613 gcc_assert (diff_stmt_vinfo);
614 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
615 return NULL;
616 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
617 return NULL;
618
619 tree half_type0, half_type1;
620 gimple *def_stmt;
621
622 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
623 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
624
625 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
626 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
627 return NULL;
628 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
629 &half_type0, &def_stmt, &promotion)
630 || !promotion)
631 return NULL;
632 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
633
634 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
635 &half_type1, &def_stmt, &promotion)
636 || !promotion)
637 return NULL;
638 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
639
640 if (!types_compatible_p (half_type0, half_type1))
641 return NULL;
642 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
643 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
644 return NULL;
645
646 *type_in = TREE_TYPE (sad_oprnd0);
647 *type_out = sum_type;
648
649 /* Pattern detected. Create a stmt to be used to replace the pattern: */
650 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
651 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
652 sad_oprnd1, plus_oprnd1);
653
654 if (dump_enabled_p ())
655 {
656 dump_printf_loc (MSG_NOTE, vect_location,
657 "vect_recog_sad_pattern: detected: ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
659 dump_printf (MSG_NOTE, "\n");
660 }
661
662 return pattern_stmt;
663 }
664
665
666 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
667 and LSHIFT_EXPR.
668
669 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
670 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
671
672 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
673 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
674 that satisfies the above restrictions, we can perform a widening opeartion
675 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
676 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
677
678 static bool
679 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
680 tree const_oprnd, tree *oprnd,
681 gimple **wstmt, tree type,
682 tree *half_type, gimple *def_stmt)
683 {
684 tree new_type, new_oprnd;
685
686 if (code != MULT_EXPR && code != LSHIFT_EXPR)
687 return false;
688
689 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
690 || (code == LSHIFT_EXPR
691 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
692 != 1))
693 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
694 {
695 /* CONST_OPRND is a constant of HALF_TYPE. */
696 *oprnd = gimple_assign_rhs1 (def_stmt);
697 return true;
698 }
699
700 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
701 return false;
702
703 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
704 return false;
705
706 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
707 a type 2 times bigger than HALF_TYPE. */
708 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
709 TYPE_UNSIGNED (type));
710 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
711 || (code == LSHIFT_EXPR
712 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
713 return false;
714
715 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
716 *oprnd = gimple_assign_rhs1 (def_stmt);
717 new_oprnd = make_ssa_name (new_type);
718 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
719 *oprnd = new_oprnd;
720
721 *half_type = new_type;
722 return true;
723 }
724
725
726 /* Function vect_recog_widen_mult_pattern
727
728 Try to find the following pattern:
729
730 type1 a_t;
731 type2 b_t;
732 TYPE a_T, b_T, prod_T;
733
734 S1 a_t = ;
735 S2 b_t = ;
736 S3 a_T = (TYPE) a_t;
737 S4 b_T = (TYPE) b_t;
738 S5 prod_T = a_T * b_T;
739
740 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
741
742 Also detect unsigned cases:
743
744 unsigned type1 a_t;
745 unsigned type2 b_t;
746 unsigned TYPE u_prod_T;
747 TYPE a_T, b_T, prod_T;
748
749 S1 a_t = ;
750 S2 b_t = ;
751 S3 a_T = (TYPE) a_t;
752 S4 b_T = (TYPE) b_t;
753 S5 prod_T = a_T * b_T;
754 S6 u_prod_T = (unsigned TYPE) prod_T;
755
756 and multiplication by constants:
757
758 type a_t;
759 TYPE a_T, prod_T;
760
761 S1 a_t = ;
762 S3 a_T = (TYPE) a_t;
763 S5 prod_T = a_T * CONST;
764
765 A special case of multiplication by constants is when 'TYPE' is 4 times
766 bigger than 'type', but CONST fits an intermediate type 2 times smaller
767 than 'TYPE'. In that case we create an additional pattern stmt for S3
768 to create a variable of the intermediate type, and perform widen-mult
769 on the intermediate type as well:
770
771 type a_t;
772 interm_type a_it;
773 TYPE a_T, prod_T, prod_T';
774
775 S1 a_t = ;
776 S3 a_T = (TYPE) a_t;
777 '--> a_it = (interm_type) a_t;
778 S5 prod_T = a_T * CONST;
779 '--> prod_T' = a_it w* CONST;
780
781 Input/Output:
782
783 * STMTS: Contains a stmt from which the pattern search begins. In the
784 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
785 is detected. In case of unsigned widen-mult, the original stmt (S5) is
786 replaced with S6 in STMTS. In case of multiplication by a constant
787 of an intermediate type (the last case above), STMTS also contains S3
788 (inserted before S5).
789
790 Output:
791
792 * TYPE_IN: The type of the input arguments to the pattern.
793
794 * TYPE_OUT: The type of the output of this pattern.
795
796 * Return value: A new stmt that will be used to replace the sequence of
797 stmts that constitute the pattern. In this case it will be:
798 WIDEN_MULT <a_t, b_t>
799 If the result of WIDEN_MULT needs to be converted to a larger type, the
800 returned stmt will be this type conversion stmt.
801 */
802
803 static gimple *
804 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
805 tree *type_in, tree *type_out)
806 {
807 gimple *last_stmt = stmts->pop ();
808 gimple *def_stmt0, *def_stmt1;
809 tree oprnd0, oprnd1;
810 tree type, half_type0, half_type1;
811 gimple *new_stmt = NULL, *pattern_stmt = NULL;
812 tree vectype, vecitype;
813 tree var;
814 enum tree_code dummy_code;
815 int dummy_int;
816 vec<tree> dummy_vec;
817 bool op1_ok;
818 bool promotion;
819
820 if (!is_gimple_assign (last_stmt))
821 return NULL;
822
823 type = gimple_expr_type (last_stmt);
824
825 /* Starting from LAST_STMT, follow the defs of its uses in search
826 of the above pattern. */
827
828 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
829 return NULL;
830
831 oprnd0 = gimple_assign_rhs1 (last_stmt);
832 oprnd1 = gimple_assign_rhs2 (last_stmt);
833 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
834 || !types_compatible_p (TREE_TYPE (oprnd1), type))
835 return NULL;
836
837 /* Check argument 0. */
838 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
839 &promotion)
840 || !promotion)
841 return NULL;
842 /* Check argument 1. */
843 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
844 &def_stmt1, &promotion);
845
846 if (op1_ok && promotion)
847 {
848 oprnd0 = gimple_assign_rhs1 (def_stmt0);
849 oprnd1 = gimple_assign_rhs1 (def_stmt1);
850 }
851 else
852 {
853 if (TREE_CODE (oprnd1) == INTEGER_CST
854 && TREE_CODE (half_type0) == INTEGER_TYPE
855 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
856 &oprnd0, &new_stmt, type,
857 &half_type0, def_stmt0))
858 {
859 half_type1 = half_type0;
860 oprnd1 = fold_convert (half_type1, oprnd1);
861 }
862 else
863 return NULL;
864 }
865
866 /* If the two arguments have different sizes, convert the one with
867 the smaller type into the larger type. */
868 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
869 {
870 /* If we already used up the single-stmt slot give up. */
871 if (new_stmt)
872 return NULL;
873
874 tree* oprnd = NULL;
875 gimple *def_stmt = NULL;
876
877 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
878 {
879 def_stmt = def_stmt0;
880 half_type0 = half_type1;
881 oprnd = &oprnd0;
882 }
883 else
884 {
885 def_stmt = def_stmt1;
886 half_type1 = half_type0;
887 oprnd = &oprnd1;
888 }
889
890 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
891 tree new_oprnd = make_ssa_name (half_type0);
892 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
893 *oprnd = new_oprnd;
894 }
895
896 /* Handle unsigned case. Look for
897 S6 u_prod_T = (unsigned TYPE) prod_T;
898 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
899 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
900 {
901 gimple *use_stmt;
902 tree use_lhs;
903 tree use_type;
904
905 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
906 return NULL;
907
908 use_stmt = vect_single_imm_use (last_stmt);
909 if (!use_stmt || !is_gimple_assign (use_stmt)
910 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
911 return NULL;
912
913 use_lhs = gimple_assign_lhs (use_stmt);
914 use_type = TREE_TYPE (use_lhs);
915 if (!INTEGRAL_TYPE_P (use_type)
916 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
917 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
918 return NULL;
919
920 type = use_type;
921 last_stmt = use_stmt;
922 }
923
924 if (!types_compatible_p (half_type0, half_type1))
925 return NULL;
926
927 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
928 to get an intermediate result of type ITYPE. In this case we need
929 to build a statement to convert this intermediate result to type TYPE. */
930 tree itype = type;
931 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
932 itype = build_nonstandard_integer_type
933 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
934 TYPE_UNSIGNED (type));
935
936 /* Pattern detected. */
937 if (dump_enabled_p ())
938 dump_printf_loc (MSG_NOTE, vect_location,
939 "vect_recog_widen_mult_pattern: detected:\n");
940
941 /* Check target support */
942 vectype = get_vectype_for_scalar_type (half_type0);
943 vecitype = get_vectype_for_scalar_type (itype);
944 if (!vectype
945 || !vecitype
946 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
947 vecitype, vectype,
948 &dummy_code, &dummy_code,
949 &dummy_int, &dummy_vec))
950 return NULL;
951
952 *type_in = vectype;
953 *type_out = get_vectype_for_scalar_type (type);
954
955 /* Pattern supported. Create a stmt to be used to replace the pattern: */
956 var = vect_recog_temp_ssa_var (itype, NULL);
957 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
958
959 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
960 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
961
962 /* If the original two operands have different sizes, we may need to convert
963 the smaller one into the larget type. If this is the case, at this point
964 the new stmt is already built. */
965 if (new_stmt)
966 {
967 append_pattern_def_seq (stmt_vinfo, new_stmt);
968 stmt_vec_info new_stmt_info
969 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
970 set_vinfo_for_stmt (new_stmt, new_stmt_info);
971 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
972 }
973
974 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
975 the result of the widen-mult operation into type TYPE. */
976 if (itype != type)
977 {
978 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
979 stmt_vec_info pattern_stmt_info
980 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
981 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
982 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
983 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
984 NOP_EXPR,
985 gimple_assign_lhs (pattern_stmt));
986 }
987
988 if (dump_enabled_p ())
989 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
990
991 stmts->safe_push (last_stmt);
992 return pattern_stmt;
993 }
994
995
996 /* Function vect_recog_pow_pattern
997
998 Try to find the following pattern:
999
1000 x = POW (y, N);
1001
1002 with POW being one of pow, powf, powi, powif and N being
1003 either 2 or 0.5.
1004
1005 Input:
1006
1007 * LAST_STMT: A stmt from which the pattern search begins.
1008
1009 Output:
1010
1011 * TYPE_IN: The type of the input arguments to the pattern.
1012
1013 * TYPE_OUT: The type of the output of this pattern.
1014
1015 * Return value: A new stmt that will be used to replace the sequence of
1016 stmts that constitute the pattern. In this case it will be:
1017 x = x * x
1018 or
1019 x = sqrt (x)
1020 */
1021
1022 static gimple *
1023 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1024 tree *type_out)
1025 {
1026 gimple *last_stmt = (*stmts)[0];
1027 tree base, exp = NULL;
1028 gimple *stmt;
1029 tree var;
1030
1031 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1032 return NULL;
1033
1034 switch (gimple_call_combined_fn (last_stmt))
1035 {
1036 CASE_CFN_POW:
1037 CASE_CFN_POWI:
1038 base = gimple_call_arg (last_stmt, 0);
1039 exp = gimple_call_arg (last_stmt, 1);
1040 if (TREE_CODE (exp) != REAL_CST
1041 && TREE_CODE (exp) != INTEGER_CST)
1042 return NULL;
1043 break;
1044
1045 default:
1046 return NULL;
1047 }
1048
1049 /* We now have a pow or powi builtin function call with a constant
1050 exponent. */
1051
1052 *type_out = NULL_TREE;
1053
1054 /* Catch squaring. */
1055 if ((tree_fits_shwi_p (exp)
1056 && tree_to_shwi (exp) == 2)
1057 || (TREE_CODE (exp) == REAL_CST
1058 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1059 {
1060 *type_in = TREE_TYPE (base);
1061
1062 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1063 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1064 return stmt;
1065 }
1066
1067 /* Catch square root. */
1068 if (TREE_CODE (exp) == REAL_CST
1069 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1070 {
1071 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1072 if (*type_in
1073 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1074 OPTIMIZE_FOR_SPEED))
1075 {
1076 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1077 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1078 gimple_call_set_lhs (stmt, var);
1079 return stmt;
1080 }
1081 }
1082
1083 return NULL;
1084 }
1085
1086
1087 /* Function vect_recog_widen_sum_pattern
1088
1089 Try to find the following pattern:
1090
1091 type x_t;
1092 TYPE x_T, sum = init;
1093 loop:
1094 sum_0 = phi <init, sum_1>
1095 S1 x_t = *p;
1096 S2 x_T = (TYPE) x_t;
1097 S3 sum_1 = x_T + sum_0;
1098
1099 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1100 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1101 a special case of a reduction computation.
1102
1103 Input:
1104
1105 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1106 when this function is called with S3, the pattern {S2,S3} will be detected.
1107
1108 Output:
1109
1110 * TYPE_IN: The type of the input arguments to the pattern.
1111
1112 * TYPE_OUT: The type of the output of this pattern.
1113
1114 * Return value: A new stmt that will be used to replace the sequence of
1115 stmts that constitute the pattern. In this case it will be:
1116 WIDEN_SUM <x_t, sum_0>
1117
1118 Note: The widening-sum idiom is a widening reduction pattern that is
1119 vectorized without preserving all the intermediate results. It
1120 produces only N/2 (widened) results (by summing up pairs of
1121 intermediate results) rather than all N results. Therefore, we
1122 cannot allow this pattern when we want to get all the results and in
1123 the correct order (as is the case when this computation is in an
1124 inner-loop nested in an outer-loop that us being vectorized). */
1125
1126 static gimple *
1127 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1128 tree *type_out)
1129 {
1130 gimple *stmt, *last_stmt = (*stmts)[0];
1131 tree oprnd0, oprnd1;
1132 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1133 tree type, half_type;
1134 gimple *pattern_stmt;
1135 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1136 struct loop *loop;
1137 tree var;
1138 bool promotion;
1139
1140 if (!loop_info)
1141 return NULL;
1142
1143 loop = LOOP_VINFO_LOOP (loop_info);
1144
1145 /* We don't allow changing the order of the computation in the inner-loop
1146 when doing outer-loop vectorization. */
1147 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1148 return NULL;
1149
1150 if (!is_gimple_assign (last_stmt))
1151 return NULL;
1152
1153 type = gimple_expr_type (last_stmt);
1154
1155 /* Look for the following pattern
1156 DX = (TYPE) X;
1157 sum_1 = DX + sum_0;
1158 In which DX is at least double the size of X, and sum_1 has been
1159 recognized as a reduction variable.
1160 */
1161
1162 /* Starting from LAST_STMT, follow the defs of its uses in search
1163 of the above pattern. */
1164
1165 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1166 return NULL;
1167
1168 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
1169 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
1170 return NULL;
1171
1172 oprnd0 = gimple_assign_rhs1 (last_stmt);
1173 oprnd1 = gimple_assign_rhs2 (last_stmt);
1174 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1175 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1176 return NULL;
1177
1178 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1179 we know that oprnd1 is the reduction variable (defined by a loop-header
1180 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1181 Left to check that oprnd0 is defined by a cast from type 'type' to type
1182 'TYPE'. */
1183
1184 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1185 &promotion)
1186 || !promotion)
1187 return NULL;
1188
1189 oprnd0 = gimple_assign_rhs1 (stmt);
1190 *type_in = half_type;
1191 *type_out = type;
1192
1193 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1194 var = vect_recog_temp_ssa_var (type, NULL);
1195 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1196
1197 if (dump_enabled_p ())
1198 {
1199 dump_printf_loc (MSG_NOTE, vect_location,
1200 "vect_recog_widen_sum_pattern: detected: ");
1201 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1202 dump_printf (MSG_NOTE, "\n");
1203 }
1204
1205 return pattern_stmt;
1206 }
1207
1208
1209 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1210
1211 Input:
1212 STMT - a statement to check.
1213 DEF - we support operations with two operands, one of which is constant.
1214 The other operand can be defined by a demotion operation, or by a
1215 previous statement in a sequence of over-promoted operations. In the
1216 later case DEF is used to replace that operand. (It is defined by a
1217 pattern statement we created for the previous statement in the
1218 sequence).
1219
1220 Input/output:
1221 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1222 NULL, it's the type of DEF.
1223 STMTS - additional pattern statements. If a pattern statement (type
1224 conversion) is created in this function, its original statement is
1225 added to STMTS.
1226
1227 Output:
1228 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1229 operands to use in the new pattern statement for STMT (will be created
1230 in vect_recog_over_widening_pattern ()).
1231 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1232 statements for STMT: the first one is a type promotion and the second
1233 one is the operation itself. We return the type promotion statement
1234 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1235 the second pattern statement. */
1236
1237 static bool
1238 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1239 tree *op0, tree *op1, gimple **new_def_stmt,
1240 vec<gimple *> *stmts)
1241 {
1242 enum tree_code code;
1243 tree const_oprnd, oprnd;
1244 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1245 gimple *def_stmt, *new_stmt;
1246 bool first = false;
1247 bool promotion;
1248
1249 *op0 = NULL_TREE;
1250 *op1 = NULL_TREE;
1251 *new_def_stmt = NULL;
1252
1253 if (!is_gimple_assign (stmt))
1254 return false;
1255
1256 code = gimple_assign_rhs_code (stmt);
1257 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1258 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1259 return false;
1260
1261 oprnd = gimple_assign_rhs1 (stmt);
1262 const_oprnd = gimple_assign_rhs2 (stmt);
1263 type = gimple_expr_type (stmt);
1264
1265 if (TREE_CODE (oprnd) != SSA_NAME
1266 || TREE_CODE (const_oprnd) != INTEGER_CST)
1267 return false;
1268
1269 /* If oprnd has other uses besides that in stmt we cannot mark it
1270 as being part of a pattern only. */
1271 if (!has_single_use (oprnd))
1272 return false;
1273
1274 /* If we are in the middle of a sequence, we use DEF from a previous
1275 statement. Otherwise, OPRND has to be a result of type promotion. */
1276 if (*new_type)
1277 {
1278 half_type = *new_type;
1279 oprnd = def;
1280 }
1281 else
1282 {
1283 first = true;
1284 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1285 &promotion)
1286 || !promotion
1287 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1288 return false;
1289 }
1290
1291 /* Can we perform the operation on a smaller type? */
1292 switch (code)
1293 {
1294 case BIT_IOR_EXPR:
1295 case BIT_XOR_EXPR:
1296 case BIT_AND_EXPR:
1297 if (!int_fits_type_p (const_oprnd, half_type))
1298 {
1299 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1300 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1301 return false;
1302
1303 interm_type = build_nonstandard_integer_type (
1304 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1305 if (!int_fits_type_p (const_oprnd, interm_type))
1306 return false;
1307 }
1308
1309 break;
1310
1311 case LSHIFT_EXPR:
1312 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1313 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1314 return false;
1315
1316 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1317 (e.g., if the original value was char, the shift amount is at most 8
1318 if we want to use short). */
1319 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1320 return false;
1321
1322 interm_type = build_nonstandard_integer_type (
1323 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1324
1325 if (!vect_supportable_shift (code, interm_type))
1326 return false;
1327
1328 break;
1329
1330 case RSHIFT_EXPR:
1331 if (vect_supportable_shift (code, half_type))
1332 break;
1333
1334 /* Try intermediate type - HALF_TYPE is not supported. */
1335 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1336 return false;
1337
1338 interm_type = build_nonstandard_integer_type (
1339 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1340
1341 if (!vect_supportable_shift (code, interm_type))
1342 return false;
1343
1344 break;
1345
1346 default:
1347 gcc_unreachable ();
1348 }
1349
1350 /* There are four possible cases:
1351 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1352 the first statement in the sequence)
1353 a. The original, HALF_TYPE, is not enough - we replace the promotion
1354 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1355 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1356 promotion.
1357 2. OPRND is defined by a pattern statement we created.
1358 a. Its type is not sufficient for the operation, we create a new stmt:
1359 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1360 this statement in NEW_DEF_STMT, and it is later put in
1361 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1362 b. OPRND is good to use in the new statement. */
1363 if (first)
1364 {
1365 if (interm_type)
1366 {
1367 /* Replace the original type conversion HALF_TYPE->TYPE with
1368 HALF_TYPE->INTERM_TYPE. */
1369 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1370 {
1371 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1372 /* Check if the already created pattern stmt is what we need. */
1373 if (!is_gimple_assign (new_stmt)
1374 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1375 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1376 return false;
1377
1378 stmts->safe_push (def_stmt);
1379 oprnd = gimple_assign_lhs (new_stmt);
1380 }
1381 else
1382 {
1383 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1384 oprnd = gimple_assign_rhs1 (def_stmt);
1385 new_oprnd = make_ssa_name (interm_type);
1386 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1387 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1388 stmts->safe_push (def_stmt);
1389 oprnd = new_oprnd;
1390 }
1391 }
1392 else
1393 {
1394 /* Retrieve the operand before the type promotion. */
1395 oprnd = gimple_assign_rhs1 (def_stmt);
1396 }
1397 }
1398 else
1399 {
1400 if (interm_type)
1401 {
1402 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1403 new_oprnd = make_ssa_name (interm_type);
1404 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1405 oprnd = new_oprnd;
1406 *new_def_stmt = new_stmt;
1407 }
1408
1409 /* Otherwise, OPRND is already set. */
1410 }
1411
1412 if (interm_type)
1413 *new_type = interm_type;
1414 else
1415 *new_type = half_type;
1416
1417 *op0 = oprnd;
1418 *op1 = fold_convert (*new_type, const_oprnd);
1419
1420 return true;
1421 }
1422
1423
1424 /* Try to find a statement or a sequence of statements that can be performed
1425 on a smaller type:
1426
1427 type x_t;
1428 TYPE x_T, res0_T, res1_T;
1429 loop:
1430 S1 x_t = *p;
1431 S2 x_T = (TYPE) x_t;
1432 S3 res0_T = op (x_T, C0);
1433 S4 res1_T = op (res0_T, C1);
1434 S5 ... = () res1_T; - type demotion
1435
1436 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1437 constants.
1438 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1439 be 'type' or some intermediate type. For now, we expect S5 to be a type
1440 demotion operation. We also check that S3 and S4 have only one use. */
1441
1442 static gimple *
1443 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1444 tree *type_in, tree *type_out)
1445 {
1446 gimple *stmt = stmts->pop ();
1447 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1448 *use_stmt = NULL;
1449 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1450 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1451 bool first;
1452 tree type = NULL;
1453
1454 first = true;
1455 while (1)
1456 {
1457 if (!vinfo_for_stmt (stmt)
1458 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1459 return NULL;
1460
1461 new_def_stmt = NULL;
1462 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1463 &op0, &op1, &new_def_stmt,
1464 stmts))
1465 {
1466 if (first)
1467 return NULL;
1468 else
1469 break;
1470 }
1471
1472 /* STMT can be performed on a smaller type. Check its uses. */
1473 use_stmt = vect_single_imm_use (stmt);
1474 if (!use_stmt || !is_gimple_assign (use_stmt))
1475 return NULL;
1476
1477 /* Create pattern statement for STMT. */
1478 vectype = get_vectype_for_scalar_type (new_type);
1479 if (!vectype)
1480 return NULL;
1481
1482 /* We want to collect all the statements for which we create pattern
1483 statetments, except for the case when the last statement in the
1484 sequence doesn't have a corresponding pattern statement. In such
1485 case we associate the last pattern statement with the last statement
1486 in the sequence. Therefore, we only add the original statement to
1487 the list if we know that it is not the last. */
1488 if (prev_stmt)
1489 stmts->safe_push (prev_stmt);
1490
1491 var = vect_recog_temp_ssa_var (new_type, NULL);
1492 pattern_stmt
1493 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1494 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1495 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1496
1497 if (dump_enabled_p ())
1498 {
1499 dump_printf_loc (MSG_NOTE, vect_location,
1500 "created pattern stmt: ");
1501 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1502 dump_printf (MSG_NOTE, "\n");
1503 }
1504
1505 type = gimple_expr_type (stmt);
1506 prev_stmt = stmt;
1507 stmt = use_stmt;
1508
1509 first = false;
1510 }
1511
1512 /* We got a sequence. We expect it to end with a type demotion operation.
1513 Otherwise, we quit (for now). There are three possible cases: the
1514 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1515 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1516 NEW_TYPE differs (we create a new conversion statement). */
1517 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1518 {
1519 use_lhs = gimple_assign_lhs (use_stmt);
1520 use_type = TREE_TYPE (use_lhs);
1521 /* Support only type demotion or signedess change. */
1522 if (!INTEGRAL_TYPE_P (use_type)
1523 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1524 return NULL;
1525
1526 /* Check that NEW_TYPE is not bigger than the conversion result. */
1527 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1528 return NULL;
1529
1530 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1531 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1532 {
1533 /* Create NEW_TYPE->USE_TYPE conversion. */
1534 new_oprnd = make_ssa_name (use_type);
1535 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1536 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1537
1538 *type_in = get_vectype_for_scalar_type (new_type);
1539 *type_out = get_vectype_for_scalar_type (use_type);
1540
1541 /* We created a pattern statement for the last statement in the
1542 sequence, so we don't need to associate it with the pattern
1543 statement created for PREV_STMT. Therefore, we add PREV_STMT
1544 to the list in order to mark it later in vect_pattern_recog_1. */
1545 if (prev_stmt)
1546 stmts->safe_push (prev_stmt);
1547 }
1548 else
1549 {
1550 if (prev_stmt)
1551 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1552 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1553
1554 *type_in = vectype;
1555 *type_out = NULL_TREE;
1556 }
1557
1558 stmts->safe_push (use_stmt);
1559 }
1560 else
1561 /* TODO: support general case, create a conversion to the correct type. */
1562 return NULL;
1563
1564 /* Pattern detected. */
1565 if (dump_enabled_p ())
1566 {
1567 dump_printf_loc (MSG_NOTE, vect_location,
1568 "vect_recog_over_widening_pattern: detected: ");
1569 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1570 dump_printf (MSG_NOTE, "\n");
1571 }
1572
1573 return pattern_stmt;
1574 }
1575
1576 /* Detect widening shift pattern:
1577
1578 type a_t;
1579 TYPE a_T, res_T;
1580
1581 S1 a_t = ;
1582 S2 a_T = (TYPE) a_t;
1583 S3 res_T = a_T << CONST;
1584
1585 where type 'TYPE' is at least double the size of type 'type'.
1586
1587 Also detect cases where the shift result is immediately converted
1588 to another type 'result_type' that is no larger in size than 'TYPE'.
1589 In those cases we perform a widen-shift that directly results in
1590 'result_type', to avoid a possible over-widening situation:
1591
1592 type a_t;
1593 TYPE a_T, res_T;
1594 result_type res_result;
1595
1596 S1 a_t = ;
1597 S2 a_T = (TYPE) a_t;
1598 S3 res_T = a_T << CONST;
1599 S4 res_result = (result_type) res_T;
1600 '--> res_result' = a_t w<< CONST;
1601
1602 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1603 create an additional pattern stmt for S2 to create a variable of an
1604 intermediate type, and perform widen-shift on the intermediate type:
1605
1606 type a_t;
1607 interm_type a_it;
1608 TYPE a_T, res_T, res_T';
1609
1610 S1 a_t = ;
1611 S2 a_T = (TYPE) a_t;
1612 '--> a_it = (interm_type) a_t;
1613 S3 res_T = a_T << CONST;
1614 '--> res_T' = a_it <<* CONST;
1615
1616 Input/Output:
1617
1618 * STMTS: Contains a stmt from which the pattern search begins.
1619 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1620 in STMTS. When an intermediate type is used and a pattern statement is
1621 created for S2, we also put S2 here (before S3).
1622
1623 Output:
1624
1625 * TYPE_IN: The type of the input arguments to the pattern.
1626
1627 * TYPE_OUT: The type of the output of this pattern.
1628
1629 * Return value: A new stmt that will be used to replace the sequence of
1630 stmts that constitute the pattern. In this case it will be:
1631 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1632
1633 static gimple *
1634 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1635 tree *type_in, tree *type_out)
1636 {
1637 gimple *last_stmt = stmts->pop ();
1638 gimple *def_stmt0;
1639 tree oprnd0, oprnd1;
1640 tree type, half_type0;
1641 gimple *pattern_stmt;
1642 tree vectype, vectype_out = NULL_TREE;
1643 tree var;
1644 enum tree_code dummy_code;
1645 int dummy_int;
1646 vec<tree> dummy_vec;
1647 gimple *use_stmt;
1648 bool promotion;
1649
1650 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1651 return NULL;
1652
1653 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1654 return NULL;
1655
1656 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1657 return NULL;
1658
1659 oprnd0 = gimple_assign_rhs1 (last_stmt);
1660 oprnd1 = gimple_assign_rhs2 (last_stmt);
1661 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1662 return NULL;
1663
1664 /* Check operand 0: it has to be defined by a type promotion. */
1665 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1666 &promotion)
1667 || !promotion)
1668 return NULL;
1669
1670 /* Check operand 1: has to be positive. We check that it fits the type
1671 in vect_handle_widen_op_by_const (). */
1672 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1673 return NULL;
1674
1675 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1676 type = gimple_expr_type (last_stmt);
1677
1678 /* Check for subsequent conversion to another type. */
1679 use_stmt = vect_single_imm_use (last_stmt);
1680 if (use_stmt && is_gimple_assign (use_stmt)
1681 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1682 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1683 {
1684 tree use_lhs = gimple_assign_lhs (use_stmt);
1685 tree use_type = TREE_TYPE (use_lhs);
1686
1687 if (INTEGRAL_TYPE_P (use_type)
1688 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1689 {
1690 last_stmt = use_stmt;
1691 type = use_type;
1692 }
1693 }
1694
1695 /* Check if this a widening operation. */
1696 gimple *wstmt = NULL;
1697 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1698 &oprnd0, &wstmt,
1699 type, &half_type0, def_stmt0))
1700 return NULL;
1701
1702 /* Pattern detected. */
1703 if (dump_enabled_p ())
1704 dump_printf_loc (MSG_NOTE, vect_location,
1705 "vect_recog_widen_shift_pattern: detected:\n");
1706
1707 /* Check target support. */
1708 vectype = get_vectype_for_scalar_type (half_type0);
1709 vectype_out = get_vectype_for_scalar_type (type);
1710
1711 if (!vectype
1712 || !vectype_out
1713 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1714 vectype_out, vectype,
1715 &dummy_code, &dummy_code,
1716 &dummy_int, &dummy_vec))
1717 return NULL;
1718
1719 *type_in = vectype;
1720 *type_out = vectype_out;
1721
1722 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1723 var = vect_recog_temp_ssa_var (type, NULL);
1724 pattern_stmt =
1725 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1726 if (wstmt)
1727 {
1728 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1729 new_pattern_def_seq (stmt_vinfo, wstmt);
1730 stmt_vec_info new_stmt_info
1731 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1732 set_vinfo_for_stmt (wstmt, new_stmt_info);
1733 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1734 }
1735
1736 if (dump_enabled_p ())
1737 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1738
1739 stmts->safe_push (last_stmt);
1740 return pattern_stmt;
1741 }
1742
1743 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1744
1745 type a_t, b_t, c_t;
1746
1747 S0 a_t = b_t r<< c_t;
1748
1749 Input/Output:
1750
1751 * STMTS: Contains a stmt from which the pattern search begins,
1752 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1753 with a sequence:
1754
1755 S1 d_t = -c_t;
1756 S2 e_t = d_t & (B - 1);
1757 S3 f_t = b_t << c_t;
1758 S4 g_t = b_t >> e_t;
1759 S0 a_t = f_t | g_t;
1760
1761 where B is element bitsize of type.
1762
1763 Output:
1764
1765 * TYPE_IN: The type of the input arguments to the pattern.
1766
1767 * TYPE_OUT: The type of the output of this pattern.
1768
1769 * Return value: A new stmt that will be used to replace the rotate
1770 S0 stmt. */
1771
1772 static gimple *
1773 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1774 {
1775 gimple *last_stmt = stmts->pop ();
1776 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1777 gimple *pattern_stmt, *def_stmt;
1778 enum tree_code rhs_code;
1779 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1780 vec_info *vinfo = stmt_vinfo->vinfo;
1781 enum vect_def_type dt;
1782 optab optab1, optab2;
1783 edge ext_def = NULL;
1784
1785 if (!is_gimple_assign (last_stmt))
1786 return NULL;
1787
1788 rhs_code = gimple_assign_rhs_code (last_stmt);
1789 switch (rhs_code)
1790 {
1791 case LROTATE_EXPR:
1792 case RROTATE_EXPR:
1793 break;
1794 default:
1795 return NULL;
1796 }
1797
1798 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1799 return NULL;
1800
1801 lhs = gimple_assign_lhs (last_stmt);
1802 oprnd0 = gimple_assign_rhs1 (last_stmt);
1803 type = TREE_TYPE (oprnd0);
1804 oprnd1 = gimple_assign_rhs2 (last_stmt);
1805 if (TREE_CODE (oprnd0) != SSA_NAME
1806 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1807 || !INTEGRAL_TYPE_P (type)
1808 || !TYPE_UNSIGNED (type))
1809 return NULL;
1810
1811 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1812 return NULL;
1813
1814 if (dt != vect_internal_def
1815 && dt != vect_constant_def
1816 && dt != vect_external_def)
1817 return NULL;
1818
1819 vectype = get_vectype_for_scalar_type (type);
1820 if (vectype == NULL_TREE)
1821 return NULL;
1822
1823 /* If vector/vector or vector/scalar rotate is supported by the target,
1824 don't do anything here. */
1825 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1826 if (optab1
1827 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1828 return NULL;
1829
1830 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1831 {
1832 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1833 if (optab2
1834 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1835 return NULL;
1836 }
1837
1838 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1839 don't do anything here either. */
1840 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1841 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1842 if (!optab1
1843 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1844 || !optab2
1845 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1846 {
1847 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1848 return NULL;
1849 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1850 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1851 if (!optab1
1852 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1853 || !optab2
1854 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1855 return NULL;
1856 }
1857
1858 *type_in = vectype;
1859 *type_out = vectype;
1860 if (*type_in == NULL_TREE)
1861 return NULL;
1862
1863 if (dt == vect_external_def
1864 && TREE_CODE (oprnd1) == SSA_NAME
1865 && is_a <loop_vec_info> (vinfo))
1866 {
1867 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1868 ext_def = loop_preheader_edge (loop);
1869 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1870 {
1871 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1872 if (bb == NULL
1873 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1874 ext_def = NULL;
1875 }
1876 }
1877
1878 def = NULL_TREE;
1879 if (TREE_CODE (oprnd1) == INTEGER_CST
1880 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1881 def = oprnd1;
1882 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1883 {
1884 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1885 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1886 && TYPE_PRECISION (TREE_TYPE (rhs1))
1887 == TYPE_PRECISION (type))
1888 def = rhs1;
1889 }
1890
1891 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1892 if (def == NULL_TREE)
1893 {
1894 def = vect_recog_temp_ssa_var (type, NULL);
1895 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1896 if (ext_def)
1897 {
1898 basic_block new_bb
1899 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1900 gcc_assert (!new_bb);
1901 }
1902 else
1903 append_pattern_def_seq (stmt_vinfo, def_stmt);
1904 }
1905 stype = TREE_TYPE (def);
1906
1907 if (TREE_CODE (def) == INTEGER_CST)
1908 {
1909 if (!tree_fits_uhwi_p (def)
1910 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1911 || integer_zerop (def))
1912 return NULL;
1913 def2 = build_int_cst (stype,
1914 GET_MODE_PRECISION (TYPE_MODE (type))
1915 - tree_to_uhwi (def));
1916 }
1917 else
1918 {
1919 tree vecstype = get_vectype_for_scalar_type (stype);
1920 stmt_vec_info def_stmt_vinfo;
1921
1922 if (vecstype == NULL_TREE)
1923 return NULL;
1924 def2 = vect_recog_temp_ssa_var (stype, NULL);
1925 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1926 if (ext_def)
1927 {
1928 basic_block new_bb
1929 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1930 gcc_assert (!new_bb);
1931 }
1932 else
1933 {
1934 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1935 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1936 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1937 append_pattern_def_seq (stmt_vinfo, def_stmt);
1938 }
1939
1940 def2 = vect_recog_temp_ssa_var (stype, NULL);
1941 tree mask
1942 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1943 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1944 gimple_assign_lhs (def_stmt), mask);
1945 if (ext_def)
1946 {
1947 basic_block new_bb
1948 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1949 gcc_assert (!new_bb);
1950 }
1951 else
1952 {
1953 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1954 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1955 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1956 append_pattern_def_seq (stmt_vinfo, def_stmt);
1957 }
1958 }
1959
1960 var1 = vect_recog_temp_ssa_var (type, NULL);
1961 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1962 ? LSHIFT_EXPR : RSHIFT_EXPR,
1963 oprnd0, def);
1964 append_pattern_def_seq (stmt_vinfo, def_stmt);
1965
1966 var2 = vect_recog_temp_ssa_var (type, NULL);
1967 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1968 ? RSHIFT_EXPR : LSHIFT_EXPR,
1969 oprnd0, def2);
1970 append_pattern_def_seq (stmt_vinfo, def_stmt);
1971
1972 /* Pattern detected. */
1973 if (dump_enabled_p ())
1974 dump_printf_loc (MSG_NOTE, vect_location,
1975 "vect_recog_rotate_pattern: detected:\n");
1976
1977 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1978 var = vect_recog_temp_ssa_var (type, NULL);
1979 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1980
1981 if (dump_enabled_p ())
1982 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1983
1984 stmts->safe_push (last_stmt);
1985 return pattern_stmt;
1986 }
1987
1988 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1989 vectorized:
1990
1991 type a_t;
1992 TYPE b_T, res_T;
1993
1994 S1 a_t = ;
1995 S2 b_T = ;
1996 S3 res_T = b_T op a_t;
1997
1998 where type 'TYPE' is a type with different size than 'type',
1999 and op is <<, >> or rotate.
2000
2001 Also detect cases:
2002
2003 type a_t;
2004 TYPE b_T, c_T, res_T;
2005
2006 S0 c_T = ;
2007 S1 a_t = (type) c_T;
2008 S2 b_T = ;
2009 S3 res_T = b_T op a_t;
2010
2011 Input/Output:
2012
2013 * STMTS: Contains a stmt from which the pattern search begins,
2014 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2015 with a shift/rotate which has same type on both operands, in the
2016 second case just b_T op c_T, in the first case with added cast
2017 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2018
2019 Output:
2020
2021 * TYPE_IN: The type of the input arguments to the pattern.
2022
2023 * TYPE_OUT: The type of the output of this pattern.
2024
2025 * Return value: A new stmt that will be used to replace the shift/rotate
2026 S3 stmt. */
2027
2028 static gimple *
2029 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2030 tree *type_in, tree *type_out)
2031 {
2032 gimple *last_stmt = stmts->pop ();
2033 tree oprnd0, oprnd1, lhs, var;
2034 gimple *pattern_stmt, *def_stmt;
2035 enum tree_code rhs_code;
2036 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2037 vec_info *vinfo = stmt_vinfo->vinfo;
2038 enum vect_def_type dt;
2039
2040 if (!is_gimple_assign (last_stmt))
2041 return NULL;
2042
2043 rhs_code = gimple_assign_rhs_code (last_stmt);
2044 switch (rhs_code)
2045 {
2046 case LSHIFT_EXPR:
2047 case RSHIFT_EXPR:
2048 case LROTATE_EXPR:
2049 case RROTATE_EXPR:
2050 break;
2051 default:
2052 return NULL;
2053 }
2054
2055 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2056 return NULL;
2057
2058 lhs = gimple_assign_lhs (last_stmt);
2059 oprnd0 = gimple_assign_rhs1 (last_stmt);
2060 oprnd1 = gimple_assign_rhs2 (last_stmt);
2061 if (TREE_CODE (oprnd0) != SSA_NAME
2062 || TREE_CODE (oprnd1) != SSA_NAME
2063 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2064 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2065 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2066 || TYPE_PRECISION (TREE_TYPE (lhs))
2067 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2068 return NULL;
2069
2070 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2071 return NULL;
2072
2073 if (dt != vect_internal_def)
2074 return NULL;
2075
2076 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2077 *type_out = *type_in;
2078 if (*type_in == NULL_TREE)
2079 return NULL;
2080
2081 tree def = NULL_TREE;
2082 if (gimple_assign_cast_p (def_stmt))
2083 {
2084 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2085 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2086 && TYPE_PRECISION (TREE_TYPE (rhs1))
2087 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2088 def = rhs1;
2089 }
2090
2091 if (def == NULL_TREE)
2092 {
2093 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2094 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2095 new_pattern_def_seq (stmt_vinfo, def_stmt);
2096 }
2097
2098 /* Pattern detected. */
2099 if (dump_enabled_p ())
2100 dump_printf_loc (MSG_NOTE, vect_location,
2101 "vect_recog_vector_vector_shift_pattern: detected:\n");
2102
2103 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2104 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2105 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2106
2107 if (dump_enabled_p ())
2108 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2109
2110 stmts->safe_push (last_stmt);
2111 return pattern_stmt;
2112 }
2113
2114 /* Detect multiplication by constant which are postive or negatives of power 2,
2115 and convert them to shift patterns.
2116
2117 Mult with constants that are postive power of two.
2118 type a_t;
2119 type b_t
2120 S1: b_t = a_t * n
2121
2122 or
2123
2124 Mult with constants that are negative power of two.
2125 S2: b_t = a_t * -n
2126
2127 Input/Output:
2128
2129 STMTS: Contains a stmt from which the pattern search begins,
2130 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2131 constant operand is a power of 2.
2132 type a_t, b_t
2133 S1': b_t = a_t << log2 (n)
2134
2135 Convert the mult operation to LSHIFT and followed by a NEGATE
2136 if constant operand is a negative power of 2.
2137 type a_t, b_t, res_T;
2138 S2': b_t = a_t << log2 (n)
2139 S3': res_T = - (b_t)
2140
2141 Output:
2142
2143 * TYPE_IN: The type of the input arguments to the pattern.
2144
2145 * TYPE_OUT: The type of the output of this pattern.
2146
2147 * Return value: A new stmt that will be used to replace the multiplication
2148 S1 or S2 stmt. */
2149
2150 static gimple *
2151 vect_recog_mult_pattern (vec<gimple *> *stmts,
2152 tree *type_in, tree *type_out)
2153 {
2154 gimple *last_stmt = stmts->pop ();
2155 tree oprnd0, oprnd1, vectype, itype;
2156 gimple *pattern_stmt, *def_stmt;
2157 optab optab;
2158 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2159 int power2_val, power2_neg_val;
2160 tree shift;
2161
2162 if (!is_gimple_assign (last_stmt))
2163 return NULL;
2164
2165 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2166 return NULL;
2167
2168 oprnd0 = gimple_assign_rhs1 (last_stmt);
2169 oprnd1 = gimple_assign_rhs2 (last_stmt);
2170 itype = TREE_TYPE (oprnd0);
2171
2172 if (TREE_CODE (oprnd0) != SSA_NAME
2173 || TREE_CODE (oprnd1) != INTEGER_CST
2174 || !INTEGRAL_TYPE_P (itype)
2175 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2176 return NULL;
2177
2178 vectype = get_vectype_for_scalar_type (itype);
2179 if (vectype == NULL_TREE)
2180 return NULL;
2181
2182 /* If the target can handle vectorized multiplication natively,
2183 don't attempt to optimize this. */
2184 optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2185 if (optab != unknown_optab)
2186 {
2187 machine_mode vec_mode = TYPE_MODE (vectype);
2188 int icode = (int) optab_handler (optab, vec_mode);
2189 if (icode != CODE_FOR_nothing)
2190 return NULL;
2191 }
2192
2193 /* If target cannot handle vector left shift then we cannot
2194 optimize and bail out. */
2195 optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2196 if (!optab
2197 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2198 return NULL;
2199
2200 power2_val = wi::exact_log2 (oprnd1);
2201 power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
2202
2203 /* Handle constant operands that are postive or negative powers of 2. */
2204 if (power2_val != -1)
2205 {
2206 shift = build_int_cst (itype, power2_val);
2207 pattern_stmt
2208 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2209 LSHIFT_EXPR, oprnd0, shift);
2210 }
2211 else if (power2_neg_val != -1)
2212 {
2213 /* If the target cannot handle vector NEGATE then we cannot
2214 do the optimization. */
2215 optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
2216 if (!optab
2217 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2218 return NULL;
2219
2220 shift = build_int_cst (itype, power2_neg_val);
2221 def_stmt
2222 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2223 LSHIFT_EXPR, oprnd0, shift);
2224 new_pattern_def_seq (stmt_vinfo, def_stmt);
2225 pattern_stmt
2226 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2227 NEGATE_EXPR, gimple_assign_lhs (def_stmt));
2228 }
2229 else
2230 return NULL;
2231
2232 /* Pattern detected. */
2233 if (dump_enabled_p ())
2234 dump_printf_loc (MSG_NOTE, vect_location,
2235 "vect_recog_mult_pattern: detected:\n");
2236
2237 if (dump_enabled_p ())
2238 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2239 pattern_stmt,0);
2240
2241 stmts->safe_push (last_stmt);
2242 *type_in = vectype;
2243 *type_out = vectype;
2244
2245 return pattern_stmt;
2246 }
2247
2248 /* Detect a signed division by a constant that wouldn't be
2249 otherwise vectorized:
2250
2251 type a_t, b_t;
2252
2253 S1 a_t = b_t / N;
2254
2255 where type 'type' is an integral type and N is a constant.
2256
2257 Similarly handle modulo by a constant:
2258
2259 S4 a_t = b_t % N;
2260
2261 Input/Output:
2262
2263 * STMTS: Contains a stmt from which the pattern search begins,
2264 i.e. the division stmt. S1 is replaced by if N is a power
2265 of two constant and type is signed:
2266 S3 y_t = b_t < 0 ? N - 1 : 0;
2267 S2 x_t = b_t + y_t;
2268 S1' a_t = x_t >> log2 (N);
2269
2270 S4 is replaced if N is a power of two constant and
2271 type is signed by (where *_T temporaries have unsigned type):
2272 S9 y_T = b_t < 0 ? -1U : 0U;
2273 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2274 S7 z_t = (type) z_T;
2275 S6 w_t = b_t + z_t;
2276 S5 x_t = w_t & (N - 1);
2277 S4' a_t = x_t - z_t;
2278
2279 Output:
2280
2281 * TYPE_IN: The type of the input arguments to the pattern.
2282
2283 * TYPE_OUT: The type of the output of this pattern.
2284
2285 * Return value: A new stmt that will be used to replace the division
2286 S1 or modulo S4 stmt. */
2287
2288 static gimple *
2289 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2290 tree *type_in, tree *type_out)
2291 {
2292 gimple *last_stmt = stmts->pop ();
2293 tree oprnd0, oprnd1, vectype, itype, cond;
2294 gimple *pattern_stmt, *def_stmt;
2295 enum tree_code rhs_code;
2296 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2297 vec_info *vinfo = stmt_vinfo->vinfo;
2298 optab optab;
2299 tree q;
2300 int dummy_int, prec;
2301 stmt_vec_info def_stmt_vinfo;
2302
2303 if (!is_gimple_assign (last_stmt))
2304 return NULL;
2305
2306 rhs_code = gimple_assign_rhs_code (last_stmt);
2307 switch (rhs_code)
2308 {
2309 case TRUNC_DIV_EXPR:
2310 case TRUNC_MOD_EXPR:
2311 break;
2312 default:
2313 return NULL;
2314 }
2315
2316 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2317 return NULL;
2318
2319 oprnd0 = gimple_assign_rhs1 (last_stmt);
2320 oprnd1 = gimple_assign_rhs2 (last_stmt);
2321 itype = TREE_TYPE (oprnd0);
2322 if (TREE_CODE (oprnd0) != SSA_NAME
2323 || TREE_CODE (oprnd1) != INTEGER_CST
2324 || TREE_CODE (itype) != INTEGER_TYPE
2325 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2326 return NULL;
2327
2328 vectype = get_vectype_for_scalar_type (itype);
2329 if (vectype == NULL_TREE)
2330 return NULL;
2331
2332 /* If the target can handle vectorized division or modulo natively,
2333 don't attempt to optimize this. */
2334 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2335 if (optab != unknown_optab)
2336 {
2337 machine_mode vec_mode = TYPE_MODE (vectype);
2338 int icode = (int) optab_handler (optab, vec_mode);
2339 if (icode != CODE_FOR_nothing)
2340 return NULL;
2341 }
2342
2343 prec = TYPE_PRECISION (itype);
2344 if (integer_pow2p (oprnd1))
2345 {
2346 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2347 return NULL;
2348
2349 /* Pattern detected. */
2350 if (dump_enabled_p ())
2351 dump_printf_loc (MSG_NOTE, vect_location,
2352 "vect_recog_divmod_pattern: detected:\n");
2353
2354 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2355 build_int_cst (itype, 0));
2356 if (rhs_code == TRUNC_DIV_EXPR)
2357 {
2358 tree var = vect_recog_temp_ssa_var (itype, NULL);
2359 tree shift;
2360 def_stmt
2361 = gimple_build_assign (var, COND_EXPR, cond,
2362 fold_build2 (MINUS_EXPR, itype, oprnd1,
2363 build_int_cst (itype, 1)),
2364 build_int_cst (itype, 0));
2365 new_pattern_def_seq (stmt_vinfo, def_stmt);
2366 var = vect_recog_temp_ssa_var (itype, NULL);
2367 def_stmt
2368 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2369 gimple_assign_lhs (def_stmt));
2370 append_pattern_def_seq (stmt_vinfo, def_stmt);
2371
2372 shift = build_int_cst (itype, tree_log2 (oprnd1));
2373 pattern_stmt
2374 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2375 RSHIFT_EXPR, var, shift);
2376 }
2377 else
2378 {
2379 tree signmask;
2380 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2381 if (compare_tree_int (oprnd1, 2) == 0)
2382 {
2383 signmask = vect_recog_temp_ssa_var (itype, NULL);
2384 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2385 build_int_cst (itype, 1),
2386 build_int_cst (itype, 0));
2387 append_pattern_def_seq (stmt_vinfo, def_stmt);
2388 }
2389 else
2390 {
2391 tree utype
2392 = build_nonstandard_integer_type (prec, 1);
2393 tree vecutype = get_vectype_for_scalar_type (utype);
2394 tree shift
2395 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2396 - tree_log2 (oprnd1));
2397 tree var = vect_recog_temp_ssa_var (utype, NULL);
2398
2399 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2400 build_int_cst (utype, -1),
2401 build_int_cst (utype, 0));
2402 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2403 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2404 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2405 append_pattern_def_seq (stmt_vinfo, def_stmt);
2406 var = vect_recog_temp_ssa_var (utype, NULL);
2407 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2408 gimple_assign_lhs (def_stmt),
2409 shift);
2410 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2411 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2412 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2413 append_pattern_def_seq (stmt_vinfo, def_stmt);
2414 signmask = vect_recog_temp_ssa_var (itype, NULL);
2415 def_stmt
2416 = gimple_build_assign (signmask, NOP_EXPR, var);
2417 append_pattern_def_seq (stmt_vinfo, def_stmt);
2418 }
2419 def_stmt
2420 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2421 PLUS_EXPR, oprnd0, signmask);
2422 append_pattern_def_seq (stmt_vinfo, def_stmt);
2423 def_stmt
2424 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2425 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2426 fold_build2 (MINUS_EXPR, itype, oprnd1,
2427 build_int_cst (itype, 1)));
2428 append_pattern_def_seq (stmt_vinfo, def_stmt);
2429
2430 pattern_stmt
2431 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2432 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2433 signmask);
2434 }
2435
2436 if (dump_enabled_p ())
2437 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2438 0);
2439
2440 stmts->safe_push (last_stmt);
2441
2442 *type_in = vectype;
2443 *type_out = vectype;
2444 return pattern_stmt;
2445 }
2446
2447 if (prec > HOST_BITS_PER_WIDE_INT
2448 || integer_zerop (oprnd1))
2449 return NULL;
2450
2451 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2452 return NULL;
2453
2454 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2455
2456 if (TYPE_UNSIGNED (itype))
2457 {
2458 unsigned HOST_WIDE_INT mh, ml;
2459 int pre_shift, post_shift;
2460 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2461 & GET_MODE_MASK (TYPE_MODE (itype)));
2462 tree t1, t2, t3, t4;
2463
2464 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2465 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2466 return NULL;
2467
2468 /* Find a suitable multiplier and right shift count
2469 instead of multiplying with D. */
2470 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2471
2472 /* If the suggested multiplier is more than SIZE bits, we can do better
2473 for even divisors, using an initial right shift. */
2474 if (mh != 0 && (d & 1) == 0)
2475 {
2476 pre_shift = floor_log2 (d & -d);
2477 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2478 &ml, &post_shift, &dummy_int);
2479 gcc_assert (!mh);
2480 }
2481 else
2482 pre_shift = 0;
2483
2484 if (mh != 0)
2485 {
2486 if (post_shift - 1 >= prec)
2487 return NULL;
2488
2489 /* t1 = oprnd0 h* ml;
2490 t2 = oprnd0 - t1;
2491 t3 = t2 >> 1;
2492 t4 = t1 + t3;
2493 q = t4 >> (post_shift - 1); */
2494 t1 = vect_recog_temp_ssa_var (itype, NULL);
2495 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2496 build_int_cst (itype, ml));
2497 append_pattern_def_seq (stmt_vinfo, def_stmt);
2498
2499 t2 = vect_recog_temp_ssa_var (itype, NULL);
2500 def_stmt
2501 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2502 append_pattern_def_seq (stmt_vinfo, def_stmt);
2503
2504 t3 = vect_recog_temp_ssa_var (itype, NULL);
2505 def_stmt
2506 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2507 append_pattern_def_seq (stmt_vinfo, def_stmt);
2508
2509 t4 = vect_recog_temp_ssa_var (itype, NULL);
2510 def_stmt
2511 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2512
2513 if (post_shift != 1)
2514 {
2515 append_pattern_def_seq (stmt_vinfo, def_stmt);
2516
2517 q = vect_recog_temp_ssa_var (itype, NULL);
2518 pattern_stmt
2519 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2520 build_int_cst (itype, post_shift - 1));
2521 }
2522 else
2523 {
2524 q = t4;
2525 pattern_stmt = def_stmt;
2526 }
2527 }
2528 else
2529 {
2530 if (pre_shift >= prec || post_shift >= prec)
2531 return NULL;
2532
2533 /* t1 = oprnd0 >> pre_shift;
2534 t2 = t1 h* ml;
2535 q = t2 >> post_shift; */
2536 if (pre_shift)
2537 {
2538 t1 = vect_recog_temp_ssa_var (itype, NULL);
2539 def_stmt
2540 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2541 build_int_cst (NULL, pre_shift));
2542 append_pattern_def_seq (stmt_vinfo, def_stmt);
2543 }
2544 else
2545 t1 = oprnd0;
2546
2547 t2 = vect_recog_temp_ssa_var (itype, NULL);
2548 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2549 build_int_cst (itype, ml));
2550
2551 if (post_shift)
2552 {
2553 append_pattern_def_seq (stmt_vinfo, def_stmt);
2554
2555 q = vect_recog_temp_ssa_var (itype, NULL);
2556 def_stmt
2557 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2558 build_int_cst (itype, post_shift));
2559 }
2560 else
2561 q = t2;
2562
2563 pattern_stmt = def_stmt;
2564 }
2565 }
2566 else
2567 {
2568 unsigned HOST_WIDE_INT ml;
2569 int post_shift;
2570 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2571 unsigned HOST_WIDE_INT abs_d;
2572 bool add = false;
2573 tree t1, t2, t3, t4;
2574
2575 /* Give up for -1. */
2576 if (d == -1)
2577 return NULL;
2578
2579 /* Since d might be INT_MIN, we have to cast to
2580 unsigned HOST_WIDE_INT before negating to avoid
2581 undefined signed overflow. */
2582 abs_d = (d >= 0
2583 ? (unsigned HOST_WIDE_INT) d
2584 : - (unsigned HOST_WIDE_INT) d);
2585
2586 /* n rem d = n rem -d */
2587 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2588 {
2589 d = abs_d;
2590 oprnd1 = build_int_cst (itype, abs_d);
2591 }
2592 else if (HOST_BITS_PER_WIDE_INT >= prec
2593 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2594 /* This case is not handled correctly below. */
2595 return NULL;
2596
2597 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2598 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2599 {
2600 add = true;
2601 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2602 }
2603 if (post_shift >= prec)
2604 return NULL;
2605
2606 /* t1 = oprnd0 h* ml; */
2607 t1 = vect_recog_temp_ssa_var (itype, NULL);
2608 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2609 build_int_cst (itype, ml));
2610
2611 if (add)
2612 {
2613 /* t2 = t1 + oprnd0; */
2614 append_pattern_def_seq (stmt_vinfo, def_stmt);
2615 t2 = vect_recog_temp_ssa_var (itype, NULL);
2616 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2617 }
2618 else
2619 t2 = t1;
2620
2621 if (post_shift)
2622 {
2623 /* t3 = t2 >> post_shift; */
2624 append_pattern_def_seq (stmt_vinfo, def_stmt);
2625 t3 = vect_recog_temp_ssa_var (itype, NULL);
2626 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2627 build_int_cst (itype, post_shift));
2628 }
2629 else
2630 t3 = t2;
2631
2632 wide_int oprnd0_min, oprnd0_max;
2633 int msb = 1;
2634 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2635 {
2636 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2637 msb = 0;
2638 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2639 msb = -1;
2640 }
2641
2642 if (msb == 0 && d >= 0)
2643 {
2644 /* q = t3; */
2645 q = t3;
2646 pattern_stmt = def_stmt;
2647 }
2648 else
2649 {
2650 /* t4 = oprnd0 >> (prec - 1);
2651 or if we know from VRP that oprnd0 >= 0
2652 t4 = 0;
2653 or if we know from VRP that oprnd0 < 0
2654 t4 = -1; */
2655 append_pattern_def_seq (stmt_vinfo, def_stmt);
2656 t4 = vect_recog_temp_ssa_var (itype, NULL);
2657 if (msb != 1)
2658 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2659 build_int_cst (itype, msb));
2660 else
2661 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2662 build_int_cst (itype, prec - 1));
2663 append_pattern_def_seq (stmt_vinfo, def_stmt);
2664
2665 /* q = t3 - t4; or q = t4 - t3; */
2666 q = vect_recog_temp_ssa_var (itype, NULL);
2667 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2668 d < 0 ? t3 : t4);
2669 }
2670 }
2671
2672 if (rhs_code == TRUNC_MOD_EXPR)
2673 {
2674 tree r, t1;
2675
2676 /* We divided. Now finish by:
2677 t1 = q * oprnd1;
2678 r = oprnd0 - t1; */
2679 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2680
2681 t1 = vect_recog_temp_ssa_var (itype, NULL);
2682 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2683 append_pattern_def_seq (stmt_vinfo, def_stmt);
2684
2685 r = vect_recog_temp_ssa_var (itype, NULL);
2686 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2687 }
2688
2689 /* Pattern detected. */
2690 if (dump_enabled_p ())
2691 {
2692 dump_printf_loc (MSG_NOTE, vect_location,
2693 "vect_recog_divmod_pattern: detected: ");
2694 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2695 dump_printf (MSG_NOTE, "\n");
2696 }
2697
2698 stmts->safe_push (last_stmt);
2699
2700 *type_in = vectype;
2701 *type_out = vectype;
2702 return pattern_stmt;
2703 }
2704
2705 /* Function vect_recog_mixed_size_cond_pattern
2706
2707 Try to find the following pattern:
2708
2709 type x_t, y_t;
2710 TYPE a_T, b_T, c_T;
2711 loop:
2712 S1 a_T = x_t CMP y_t ? b_T : c_T;
2713
2714 where type 'TYPE' is an integral type which has different size
2715 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2716 than 'type', the constants need to fit into an integer type
2717 with the same width as 'type') or results of conversion from 'type'.
2718
2719 Input:
2720
2721 * LAST_STMT: A stmt from which the pattern search begins.
2722
2723 Output:
2724
2725 * TYPE_IN: The type of the input arguments to the pattern.
2726
2727 * TYPE_OUT: The type of the output of this pattern.
2728
2729 * Return value: A new stmt that will be used to replace the pattern.
2730 Additionally a def_stmt is added.
2731
2732 a_it = x_t CMP y_t ? b_it : c_it;
2733 a_T = (TYPE) a_it; */
2734
2735 static gimple *
2736 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
2737 tree *type_out)
2738 {
2739 gimple *last_stmt = (*stmts)[0];
2740 tree cond_expr, then_clause, else_clause;
2741 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2742 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2743 gimple *pattern_stmt, *def_stmt;
2744 vec_info *vinfo = stmt_vinfo->vinfo;
2745 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2746 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
2747 bool promotion;
2748 tree comp_scalar_type;
2749
2750 if (!is_gimple_assign (last_stmt)
2751 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2752 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2753 return NULL;
2754
2755 cond_expr = gimple_assign_rhs1 (last_stmt);
2756 then_clause = gimple_assign_rhs2 (last_stmt);
2757 else_clause = gimple_assign_rhs3 (last_stmt);
2758
2759 if (!COMPARISON_CLASS_P (cond_expr))
2760 return NULL;
2761
2762 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2763 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2764 if (comp_vectype == NULL_TREE)
2765 return NULL;
2766
2767 type = gimple_expr_type (last_stmt);
2768 if (types_compatible_p (type, comp_scalar_type)
2769 || ((TREE_CODE (then_clause) != INTEGER_CST
2770 || TREE_CODE (else_clause) != INTEGER_CST)
2771 && !INTEGRAL_TYPE_P (comp_scalar_type))
2772 || !INTEGRAL_TYPE_P (type))
2773 return NULL;
2774
2775 if ((TREE_CODE (then_clause) != INTEGER_CST
2776 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2777 &def_stmt0, &promotion))
2778 || (TREE_CODE (else_clause) != INTEGER_CST
2779 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2780 &def_stmt1, &promotion)))
2781 return NULL;
2782
2783 if (orig_type0 && orig_type1
2784 && !types_compatible_p (orig_type0, orig_type1))
2785 return NULL;
2786
2787 if (orig_type0)
2788 {
2789 if (!types_compatible_p (orig_type0, comp_scalar_type))
2790 return NULL;
2791 then_clause = gimple_assign_rhs1 (def_stmt0);
2792 itype = orig_type0;
2793 }
2794
2795 if (orig_type1)
2796 {
2797 if (!types_compatible_p (orig_type1, comp_scalar_type))
2798 return NULL;
2799 else_clause = gimple_assign_rhs1 (def_stmt1);
2800 itype = orig_type1;
2801 }
2802
2803
2804 HOST_WIDE_INT cmp_mode_size
2805 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
2806
2807 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
2808 return NULL;
2809
2810 vectype = get_vectype_for_scalar_type (type);
2811 if (vectype == NULL_TREE)
2812 return NULL;
2813
2814 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2815 return NULL;
2816
2817 if (itype == NULL_TREE)
2818 itype = build_nonstandard_integer_type (cmp_mode_size,
2819 TYPE_UNSIGNED (type));
2820
2821 if (itype == NULL_TREE
2822 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
2823 return NULL;
2824
2825 vecitype = get_vectype_for_scalar_type (itype);
2826 if (vecitype == NULL_TREE)
2827 return NULL;
2828
2829 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2830 return NULL;
2831
2832 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
2833 {
2834 if ((TREE_CODE (then_clause) == INTEGER_CST
2835 && !int_fits_type_p (then_clause, itype))
2836 || (TREE_CODE (else_clause) == INTEGER_CST
2837 && !int_fits_type_p (else_clause, itype)))
2838 return NULL;
2839 }
2840
2841 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2842 COND_EXPR, unshare_expr (cond_expr),
2843 fold_convert (itype, then_clause),
2844 fold_convert (itype, else_clause));
2845 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2846 NOP_EXPR, gimple_assign_lhs (def_stmt));
2847
2848 new_pattern_def_seq (stmt_vinfo, def_stmt);
2849 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
2850 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2851 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2852 *type_in = vecitype;
2853 *type_out = vectype;
2854
2855 if (dump_enabled_p ())
2856 dump_printf_loc (MSG_NOTE, vect_location,
2857 "vect_recog_mixed_size_cond_pattern: detected:\n");
2858
2859 return pattern_stmt;
2860 }
2861
2862
2863 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2864 true if bool VAR can and should be optimized that way. Assume it shouldn't
2865 in case it's a result of a comparison which can be directly vectorized into
2866 a vector comparison. */
2867
2868 static bool
2869 check_bool_pattern (tree var, vec_info *vinfo)
2870 {
2871 gimple *def_stmt;
2872 enum vect_def_type dt;
2873 tree rhs1;
2874 enum tree_code rhs_code;
2875
2876 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
2877 return false;
2878
2879 if (dt != vect_internal_def)
2880 return false;
2881
2882 if (!is_gimple_assign (def_stmt))
2883 return false;
2884
2885 if (!has_single_use (var))
2886 return false;
2887
2888 rhs1 = gimple_assign_rhs1 (def_stmt);
2889 rhs_code = gimple_assign_rhs_code (def_stmt);
2890 switch (rhs_code)
2891 {
2892 case SSA_NAME:
2893 return check_bool_pattern (rhs1, vinfo);
2894
2895 CASE_CONVERT:
2896 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2897 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2898 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2899 return false;
2900 return check_bool_pattern (rhs1, vinfo);
2901
2902 case BIT_NOT_EXPR:
2903 return check_bool_pattern (rhs1, vinfo);
2904
2905 case BIT_AND_EXPR:
2906 case BIT_IOR_EXPR:
2907 case BIT_XOR_EXPR:
2908 if (!check_bool_pattern (rhs1, vinfo))
2909 return false;
2910 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo);
2911
2912 default:
2913 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2914 {
2915 tree vecitype, comp_vectype, mask_type;
2916
2917 /* If the comparison can throw, then is_gimple_condexpr will be
2918 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2919 if (stmt_could_throw_p (def_stmt))
2920 return false;
2921
2922 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2923 if (comp_vectype == NULL_TREE)
2924 return false;
2925
2926 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
2927 if (mask_type
2928 && expand_vec_cmp_expr_p (comp_vectype, mask_type))
2929 return false;
2930
2931 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2932 {
2933 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2934 tree itype
2935 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2936 vecitype = get_vectype_for_scalar_type (itype);
2937 if (vecitype == NULL_TREE)
2938 return false;
2939 }
2940 else
2941 vecitype = comp_vectype;
2942 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2943 }
2944 return false;
2945 }
2946 }
2947
2948
2949 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2950 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2951 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2952
2953 static tree
2954 adjust_bool_pattern_cast (tree type, tree var)
2955 {
2956 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2957 gimple *cast_stmt, *pattern_stmt;
2958
2959 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2960 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2961 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2962 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2963 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2964 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2965 return gimple_assign_lhs (cast_stmt);
2966 }
2967
2968
2969 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2970 recursively. VAR is an SSA_NAME that should be transformed from bool
2971 to a wider integer type, OUT_TYPE is the desired final integer type of
2972 the whole pattern, TRUEVAL should be NULL unless optimizing
2973 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2974 in the then_clause, STMTS is where statements with added pattern stmts
2975 should be pushed to. */
2976
2977 static tree
2978 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2979 vec<gimple *> *stmts)
2980 {
2981 gimple *stmt = SSA_NAME_DEF_STMT (var);
2982 enum tree_code rhs_code, def_rhs_code;
2983 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2984 location_t loc;
2985 gimple *pattern_stmt, *def_stmt;
2986
2987 rhs1 = gimple_assign_rhs1 (stmt);
2988 rhs2 = gimple_assign_rhs2 (stmt);
2989 rhs_code = gimple_assign_rhs_code (stmt);
2990 loc = gimple_location (stmt);
2991 switch (rhs_code)
2992 {
2993 case SSA_NAME:
2994 CASE_CONVERT:
2995 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2996 itype = TREE_TYPE (irhs1);
2997 pattern_stmt
2998 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2999 SSA_NAME, irhs1);
3000 break;
3001
3002 case BIT_NOT_EXPR:
3003 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3004 itype = TREE_TYPE (irhs1);
3005 pattern_stmt
3006 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3007 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3008 break;
3009
3010 case BIT_AND_EXPR:
3011 /* Try to optimize x = y & (a < b ? 1 : 0); into
3012 x = (a < b ? y : 0);
3013
3014 E.g. for:
3015 bool a_b, b_b, c_b;
3016 TYPE d_T;
3017
3018 S1 a_b = x1 CMP1 y1;
3019 S2 b_b = x2 CMP2 y2;
3020 S3 c_b = a_b & b_b;
3021 S4 d_T = (TYPE) c_b;
3022
3023 we would normally emit:
3024
3025 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3026 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3027 S3' c_T = a_T & b_T;
3028 S4' d_T = c_T;
3029
3030 but we can save one stmt by using the
3031 result of one of the COND_EXPRs in the other COND_EXPR and leave
3032 BIT_AND_EXPR stmt out:
3033
3034 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3035 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3036 S4' f_T = c_T;
3037
3038 At least when VEC_COND_EXPR is implemented using masks
3039 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3040 computes the comparison masks and ands it, in one case with
3041 all ones vector, in the other case with a vector register.
3042 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3043 often more expensive. */
3044 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3045 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3046 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3047 {
3048 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3049 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3050 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3051 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3052 {
3053 gimple *tstmt;
3054 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3055 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3056 tstmt = stmts->pop ();
3057 gcc_assert (tstmt == def_stmt);
3058 stmts->quick_push (stmt);
3059 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3060 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3061 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3062 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3063 return irhs2;
3064 }
3065 else
3066 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3067 goto and_ior_xor;
3068 }
3069 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3070 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3071 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3072 {
3073 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3074 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3075 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3076 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3077 {
3078 gimple *tstmt;
3079 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3080 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3081 tstmt = stmts->pop ();
3082 gcc_assert (tstmt == def_stmt);
3083 stmts->quick_push (stmt);
3084 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3085 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3086 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3087 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3088 return irhs1;
3089 }
3090 else
3091 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3092 goto and_ior_xor;
3093 }
3094 /* FALLTHRU */
3095 case BIT_IOR_EXPR:
3096 case BIT_XOR_EXPR:
3097 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3098 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3099 and_ior_xor:
3100 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3101 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3102 {
3103 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3104 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3105 int out_prec = TYPE_PRECISION (out_type);
3106 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3107 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3108 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3109 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3110 else
3111 {
3112 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3113 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3114 }
3115 }
3116 itype = TREE_TYPE (irhs1);
3117 pattern_stmt
3118 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3119 rhs_code, irhs1, irhs2);
3120 break;
3121
3122 default:
3123 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3124 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3125 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3126 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3127 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3128 {
3129 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3130 itype
3131 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3132 }
3133 else
3134 itype = TREE_TYPE (rhs1);
3135 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3136 if (trueval == NULL_TREE)
3137 trueval = build_int_cst (itype, 1);
3138 else
3139 gcc_checking_assert (useless_type_conversion_p (itype,
3140 TREE_TYPE (trueval)));
3141 pattern_stmt
3142 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3143 COND_EXPR, cond_expr, trueval,
3144 build_int_cst (itype, 0));
3145 break;
3146 }
3147
3148 stmts->safe_push (stmt);
3149 gimple_set_location (pattern_stmt, loc);
3150 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3151 return gimple_assign_lhs (pattern_stmt);
3152 }
3153
3154
3155 /* Return the proper type for converting bool VAR into
3156 an integer value or NULL_TREE if no such type exists.
3157 The type is chosen so that converted value has the
3158 same number of elements as VAR's vector type. */
3159
3160 static tree
3161 search_type_for_mask (tree var, vec_info *vinfo)
3162 {
3163 gimple *def_stmt;
3164 enum vect_def_type dt;
3165 tree rhs1;
3166 enum tree_code rhs_code;
3167 tree res = NULL_TREE, res2;
3168
3169 if (TREE_CODE (var) != SSA_NAME)
3170 return NULL_TREE;
3171
3172 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3173 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3174 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3175 return NULL_TREE;
3176
3177 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3178 return NULL_TREE;
3179
3180 if (dt != vect_internal_def)
3181 return NULL_TREE;
3182
3183 if (!is_gimple_assign (def_stmt))
3184 return NULL_TREE;
3185
3186 rhs_code = gimple_assign_rhs_code (def_stmt);
3187 rhs1 = gimple_assign_rhs1 (def_stmt);
3188
3189 switch (rhs_code)
3190 {
3191 case SSA_NAME:
3192 case BIT_NOT_EXPR:
3193 CASE_CONVERT:
3194 res = search_type_for_mask (rhs1, vinfo);
3195 break;
3196
3197 case BIT_AND_EXPR:
3198 case BIT_IOR_EXPR:
3199 case BIT_XOR_EXPR:
3200 res = search_type_for_mask (rhs1, vinfo);
3201 res2 = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo);
3202 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3203 res = res2;
3204 break;
3205
3206 default:
3207 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3208 {
3209 tree comp_vectype, mask_type;
3210
3211 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3212 if (comp_vectype == NULL_TREE)
3213 return NULL_TREE;
3214
3215 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3216 if (!mask_type
3217 || !expand_vec_cmp_expr_p (comp_vectype, mask_type))
3218 return NULL_TREE;
3219
3220 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3221 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3222 {
3223 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3224 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3225 }
3226 else
3227 res = TREE_TYPE (rhs1);
3228 }
3229 }
3230
3231 return res;
3232 }
3233
3234
3235 /* Function vect_recog_bool_pattern
3236
3237 Try to find pattern like following:
3238
3239 bool a_b, b_b, c_b, d_b, e_b;
3240 TYPE f_T;
3241 loop:
3242 S1 a_b = x1 CMP1 y1;
3243 S2 b_b = x2 CMP2 y2;
3244 S3 c_b = a_b & b_b;
3245 S4 d_b = x3 CMP3 y3;
3246 S5 e_b = c_b | d_b;
3247 S6 f_T = (TYPE) e_b;
3248
3249 where type 'TYPE' is an integral type. Or a similar pattern
3250 ending in
3251
3252 S6 f_Y = e_b ? r_Y : s_Y;
3253
3254 as results from if-conversion of a complex condition.
3255
3256 Input:
3257
3258 * LAST_STMT: A stmt at the end from which the pattern
3259 search begins, i.e. cast of a bool to
3260 an integer type.
3261
3262 Output:
3263
3264 * TYPE_IN: The type of the input arguments to the pattern.
3265
3266 * TYPE_OUT: The type of the output of this pattern.
3267
3268 * Return value: A new stmt that will be used to replace the pattern.
3269
3270 Assuming size of TYPE is the same as size of all comparisons
3271 (otherwise some casts would be added where needed), the above
3272 sequence we create related pattern stmts:
3273 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3274 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3275 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3276 S5' e_T = c_T | d_T;
3277 S6' f_T = e_T;
3278
3279 Instead of the above S3' we could emit:
3280 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3281 S3' c_T = a_T | b_T;
3282 but the above is more efficient. */
3283
3284 static gimple *
3285 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3286 tree *type_out)
3287 {
3288 gimple *last_stmt = stmts->pop ();
3289 enum tree_code rhs_code;
3290 tree var, lhs, rhs, vectype;
3291 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3292 stmt_vec_info new_stmt_info;
3293 vec_info *vinfo = stmt_vinfo->vinfo;
3294 gimple *pattern_stmt;
3295
3296 if (!is_gimple_assign (last_stmt))
3297 return NULL;
3298
3299 var = gimple_assign_rhs1 (last_stmt);
3300 lhs = gimple_assign_lhs (last_stmt);
3301
3302 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3303 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3304 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3305 return NULL;
3306
3307 rhs_code = gimple_assign_rhs_code (last_stmt);
3308 if (CONVERT_EXPR_CODE_P (rhs_code))
3309 {
3310 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3311 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3312 return NULL;
3313 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3314 if (vectype == NULL_TREE)
3315 return NULL;
3316
3317 if (check_bool_pattern (var, vinfo))
3318 {
3319 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3320 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3321 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3322 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3323 else
3324 pattern_stmt
3325 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3326 }
3327 else
3328 {
3329 tree type = search_type_for_mask (var, vinfo);
3330 tree cst0, cst1, tmp;
3331
3332 if (!type)
3333 return NULL;
3334
3335 /* We may directly use cond with narrowed type to avoid
3336 multiple cond exprs with following result packing and
3337 perform single cond with packed mask instead. In case
3338 of widening we better make cond first and then extract
3339 results. */
3340 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3341 type = TREE_TYPE (lhs);
3342
3343 cst0 = build_int_cst (type, 0);
3344 cst1 = build_int_cst (type, 1);
3345 tmp = vect_recog_temp_ssa_var (type, NULL);
3346 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3347
3348 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3349 {
3350 tree new_vectype = get_vectype_for_scalar_type (type);
3351 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3352 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3353 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3354 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3355
3356 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3357 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3358 }
3359 }
3360
3361 *type_out = vectype;
3362 *type_in = vectype;
3363 stmts->safe_push (last_stmt);
3364 if (dump_enabled_p ())
3365 dump_printf_loc (MSG_NOTE, vect_location,
3366 "vect_recog_bool_pattern: detected:\n");
3367
3368 return pattern_stmt;
3369 }
3370 else if (rhs_code == COND_EXPR
3371 && TREE_CODE (var) == SSA_NAME)
3372 {
3373 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3374 if (vectype == NULL_TREE)
3375 return NULL;
3376
3377 /* Build a scalar type for the boolean result that when
3378 vectorized matches the vector type of the result in
3379 size and number of elements. */
3380 unsigned prec
3381 = wi::udiv_trunc (TYPE_SIZE (vectype),
3382 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3383 tree type
3384 = build_nonstandard_integer_type (prec,
3385 TYPE_UNSIGNED (TREE_TYPE (var)));
3386 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3387 return NULL;
3388
3389 if (!check_bool_pattern (var, vinfo))
3390 return NULL;
3391
3392 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3393
3394 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3395 pattern_stmt
3396 = gimple_build_assign (lhs, COND_EXPR,
3397 build2 (NE_EXPR, boolean_type_node,
3398 rhs, build_int_cst (type, 0)),
3399 gimple_assign_rhs2 (last_stmt),
3400 gimple_assign_rhs3 (last_stmt));
3401 *type_out = vectype;
3402 *type_in = vectype;
3403 stmts->safe_push (last_stmt);
3404 if (dump_enabled_p ())
3405 dump_printf_loc (MSG_NOTE, vect_location,
3406 "vect_recog_bool_pattern: detected:\n");
3407
3408 return pattern_stmt;
3409 }
3410 else if (rhs_code == SSA_NAME
3411 && STMT_VINFO_DATA_REF (stmt_vinfo))
3412 {
3413 stmt_vec_info pattern_stmt_info;
3414 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3415 gcc_assert (vectype != NULL_TREE);
3416 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3417 return NULL;
3418
3419 if (check_bool_pattern (var, vinfo))
3420 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype),
3421 NULL_TREE, stmts);
3422 else
3423 {
3424 tree type = search_type_for_mask (var, vinfo);
3425 tree cst0, cst1, new_vectype;
3426
3427 if (!type)
3428 return NULL;
3429
3430 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3431 type = TREE_TYPE (vectype);
3432
3433 cst0 = build_int_cst (type, 0);
3434 cst1 = build_int_cst (type, 1);
3435 new_vectype = get_vectype_for_scalar_type (type);
3436
3437 rhs = vect_recog_temp_ssa_var (type, NULL);
3438 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3439
3440 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3441 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3442 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3443 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3444 }
3445
3446 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3447 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3448 {
3449 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3450 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3451 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3452 rhs = rhs2;
3453 }
3454 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3455 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3456 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3457 STMT_VINFO_DATA_REF (pattern_stmt_info)
3458 = STMT_VINFO_DATA_REF (stmt_vinfo);
3459 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3460 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3461 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3462 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3463 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3464 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3465 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3466 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3467 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3468 *type_out = vectype;
3469 *type_in = vectype;
3470 stmts->safe_push (last_stmt);
3471 if (dump_enabled_p ())
3472 dump_printf_loc (MSG_NOTE, vect_location,
3473 "vect_recog_bool_pattern: detected:\n");
3474 return pattern_stmt;
3475 }
3476 else
3477 return NULL;
3478 }
3479
3480
3481 /* A helper for vect_recog_mask_conversion_pattern. Build
3482 conversion of MASK to a type suitable for masking VECTYPE.
3483 Built statement gets required vectype and is appended to
3484 a pattern sequence of STMT_VINFO.
3485
3486 Return converted mask. */
3487
3488 static tree
3489 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3490 vec_info *vinfo)
3491 {
3492 gimple *stmt;
3493 tree masktype, tmp;
3494 stmt_vec_info new_stmt_info;
3495
3496 masktype = build_same_sized_truth_vector_type (vectype);
3497 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3498 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3499 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3500 set_vinfo_for_stmt (stmt, new_stmt_info);
3501 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3502 append_pattern_def_seq (stmt_vinfo, stmt);
3503
3504 return tmp;
3505 }
3506
3507
3508 /* Function vect_recog_mask_conversion_pattern
3509
3510 Try to find statements which require boolean type
3511 converison. Additional conversion statements are
3512 added to handle such cases. For example:
3513
3514 bool m_1, m_2, m_3;
3515 int i_4, i_5;
3516 double d_6, d_7;
3517 char c_1, c_2, c_3;
3518
3519 S1 m_1 = i_4 > i_5;
3520 S2 m_2 = d_6 < d_7;
3521 S3 m_3 = m_1 & m_2;
3522 S4 c_1 = m_3 ? c_2 : c_3;
3523
3524 Will be transformed into:
3525
3526 S1 m_1 = i_4 > i_5;
3527 S2 m_2 = d_6 < d_7;
3528 S3'' m_2' = (_Bool[bitsize=32])m_2
3529 S3' m_3' = m_1 & m_2';
3530 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3531 S4' c_1' = m_3'' ? c_2 : c_3; */
3532
3533 static gimple *
3534 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3535 tree *type_out)
3536 {
3537 gimple *last_stmt = stmts->pop ();
3538 enum tree_code rhs_code;
3539 tree lhs, rhs1, rhs2, tmp, rhs1_type, rhs2_type, vectype1, vectype2;
3540 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3541 stmt_vec_info pattern_stmt_info;
3542 vec_info *vinfo = stmt_vinfo->vinfo;
3543 gimple *pattern_stmt;
3544
3545 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3546 if (is_gimple_call (last_stmt)
3547 && gimple_call_internal_p (last_stmt)
3548 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3549 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3550 {
3551 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3552
3553 if (load)
3554 {
3555 lhs = gimple_call_lhs (last_stmt);
3556 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3557 }
3558 else
3559 {
3560 rhs2 = gimple_call_arg (last_stmt, 3);
3561 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3562 }
3563
3564 rhs1 = gimple_call_arg (last_stmt, 2);
3565 rhs1_type = search_type_for_mask (rhs1, vinfo);
3566 if (!rhs1_type)
3567 return NULL;
3568 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3569
3570 if (!vectype1 || !vectype2
3571 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3572 return NULL;
3573
3574 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3575
3576 if (load)
3577 {
3578 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3579 pattern_stmt
3580 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3581 gimple_call_arg (last_stmt, 0),
3582 gimple_call_arg (last_stmt, 1),
3583 tmp);
3584 gimple_call_set_lhs (pattern_stmt, lhs);
3585 }
3586 else
3587 pattern_stmt
3588 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3589 gimple_call_arg (last_stmt, 0),
3590 gimple_call_arg (last_stmt, 1),
3591 tmp,
3592 gimple_call_arg (last_stmt, 3));
3593
3594
3595 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3596 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3597 STMT_VINFO_DATA_REF (pattern_stmt_info)
3598 = STMT_VINFO_DATA_REF (stmt_vinfo);
3599 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3600 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3601 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3602 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3603 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3604 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3605 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3606 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3607 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3608
3609 *type_out = vectype1;
3610 *type_in = vectype1;
3611 stmts->safe_push (last_stmt);
3612 if (dump_enabled_p ())
3613 dump_printf_loc (MSG_NOTE, vect_location,
3614 "vect_recog_mask_conversion_pattern: detected:\n");
3615
3616 return pattern_stmt;
3617 }
3618
3619 if (!is_gimple_assign (last_stmt))
3620 return NULL;
3621
3622 lhs = gimple_assign_lhs (last_stmt);
3623 rhs1 = gimple_assign_rhs1 (last_stmt);
3624 rhs_code = gimple_assign_rhs_code (last_stmt);
3625
3626 /* Check for cond expression requiring mask conversion. */
3627 if (rhs_code == COND_EXPR)
3628 {
3629 /* vect_recog_mixed_size_cond_pattern could apply.
3630 Do nothing then. */
3631 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3632 return NULL;
3633
3634 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3635
3636 if (TREE_CODE (rhs1) == SSA_NAME)
3637 {
3638 rhs1_type = search_type_for_mask (rhs1, vinfo);
3639 if (!rhs1_type)
3640 return NULL;
3641 }
3642 else
3643 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3644
3645 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3646
3647 if (!vectype1 || !vectype2
3648 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3649 return NULL;
3650
3651 /* If rhs1 is a comparison we need to move it into a
3652 separate statement. */
3653 if (TREE_CODE (rhs1) != SSA_NAME)
3654 {
3655 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3656 pattern_stmt = gimple_build_assign (tmp, rhs1);
3657 rhs1 = tmp;
3658
3659 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3660 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3661 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
3662 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3663 }
3664
3665 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3666
3667 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3668 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
3669 gimple_assign_rhs2 (last_stmt),
3670 gimple_assign_rhs3 (last_stmt));
3671
3672 *type_out = vectype1;
3673 *type_in = vectype1;
3674 stmts->safe_push (last_stmt);
3675 if (dump_enabled_p ())
3676 dump_printf_loc (MSG_NOTE, vect_location,
3677 "vect_recog_mask_conversion_pattern: detected:\n");
3678
3679 return pattern_stmt;
3680 }
3681
3682 /* Now check for binary boolean operations requiring conversion for
3683 one of operands. */
3684 if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)
3685 return NULL;
3686
3687 if (rhs_code != BIT_IOR_EXPR
3688 && rhs_code != BIT_XOR_EXPR
3689 && rhs_code != BIT_AND_EXPR)
3690 return NULL;
3691
3692 rhs2 = gimple_assign_rhs2 (last_stmt);
3693
3694 rhs1_type = search_type_for_mask (rhs1, vinfo);
3695 rhs2_type = search_type_for_mask (rhs2, vinfo);
3696
3697 if (!rhs1_type || !rhs2_type
3698 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
3699 return NULL;
3700
3701 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
3702 {
3703 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
3704 if (!vectype1)
3705 return NULL;
3706 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
3707 }
3708 else
3709 {
3710 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
3711 if (!vectype1)
3712 return NULL;
3713 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3714 }
3715
3716 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3717 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
3718
3719 *type_out = vectype1;
3720 *type_in = vectype1;
3721 stmts->safe_push (last_stmt);
3722 if (dump_enabled_p ())
3723 dump_printf_loc (MSG_NOTE, vect_location,
3724 "vect_recog_mask_conversion_pattern: detected:\n");
3725
3726 return pattern_stmt;
3727 }
3728
3729
3730 /* Mark statements that are involved in a pattern. */
3731
3732 static inline void
3733 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
3734 tree pattern_vectype)
3735 {
3736 stmt_vec_info pattern_stmt_info, def_stmt_info;
3737 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3738 vec_info *vinfo = orig_stmt_info->vinfo;
3739 gimple *def_stmt;
3740
3741 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3742 if (pattern_stmt_info == NULL)
3743 {
3744 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3745 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3746 }
3747 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3748
3749 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3750 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3751 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3752 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3753 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3754 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3755 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3756 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3757 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3758 {
3759 gimple_stmt_iterator si;
3760 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3761 !gsi_end_p (si); gsi_next (&si))
3762 {
3763 def_stmt = gsi_stmt (si);
3764 def_stmt_info = vinfo_for_stmt (def_stmt);
3765 if (def_stmt_info == NULL)
3766 {
3767 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3768 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3769 }
3770 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3771 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3772 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3773 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3774 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3775 }
3776 }
3777 }
3778
3779 /* Function vect_pattern_recog_1
3780
3781 Input:
3782 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3783 computation pattern.
3784 STMT: A stmt from which the pattern search should start.
3785
3786 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3787 expression that computes the same functionality and can be used to
3788 replace the sequence of stmts that are involved in the pattern.
3789
3790 Output:
3791 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3792 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3793 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3794 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3795 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3796 to the available target pattern.
3797
3798 This function also does some bookkeeping, as explained in the documentation
3799 for vect_recog_pattern. */
3800
3801 static bool
3802 vect_pattern_recog_1 (vect_recog_func *recog_func,
3803 gimple_stmt_iterator si,
3804 vec<gimple *> *stmts_to_replace)
3805 {
3806 gimple *stmt = gsi_stmt (si), *pattern_stmt;
3807 stmt_vec_info stmt_info;
3808 loop_vec_info loop_vinfo;
3809 tree pattern_vectype;
3810 tree type_in, type_out;
3811 enum tree_code code;
3812 int i;
3813 gimple *next;
3814
3815 stmts_to_replace->truncate (0);
3816 stmts_to_replace->quick_push (stmt);
3817 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
3818 if (!pattern_stmt)
3819 return false;
3820
3821 stmt = stmts_to_replace->last ();
3822 stmt_info = vinfo_for_stmt (stmt);
3823 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3824
3825 if (VECTOR_BOOLEAN_TYPE_P (type_in)
3826 || VECTOR_MODE_P (TYPE_MODE (type_in)))
3827 {
3828 /* No need to check target support (already checked by the pattern
3829 recognition function). */
3830 pattern_vectype = type_out ? type_out : type_in;
3831 }
3832 else
3833 {
3834 machine_mode vec_mode;
3835 enum insn_code icode;
3836 optab optab;
3837
3838 /* Check target support */
3839 type_in = get_vectype_for_scalar_type (type_in);
3840 if (!type_in)
3841 return false;
3842 if (type_out)
3843 type_out = get_vectype_for_scalar_type (type_out);
3844 else
3845 type_out = type_in;
3846 if (!type_out)
3847 return false;
3848 pattern_vectype = type_out;
3849
3850 if (is_gimple_assign (pattern_stmt))
3851 code = gimple_assign_rhs_code (pattern_stmt);
3852 else
3853 {
3854 gcc_assert (is_gimple_call (pattern_stmt));
3855 code = CALL_EXPR;
3856 }
3857
3858 optab = optab_for_tree_code (code, type_in, optab_default);
3859 vec_mode = TYPE_MODE (type_in);
3860 if (!optab
3861 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3862 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3863 return false;
3864 }
3865
3866 /* Found a vectorizable pattern. */
3867 if (dump_enabled_p ())
3868 {
3869 dump_printf_loc (MSG_NOTE, vect_location,
3870 "%s pattern recognized: ", recog_func->name);
3871 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3872 }
3873
3874 /* Mark the stmts that are involved in the pattern. */
3875 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3876
3877 /* Patterns cannot be vectorized using SLP, because they change the order of
3878 computation. */
3879 if (loop_vinfo)
3880 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3881 if (next == stmt)
3882 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3883
3884 /* It is possible that additional pattern stmts are created and inserted in
3885 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3886 relevant statements. */
3887 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3888 && (unsigned) i < (stmts_to_replace->length () - 1);
3889 i++)
3890 {
3891 stmt_info = vinfo_for_stmt (stmt);
3892 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3893 if (dump_enabled_p ())
3894 {
3895 dump_printf_loc (MSG_NOTE, vect_location,
3896 "additional pattern stmt: ");
3897 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3898 }
3899
3900 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3901 }
3902
3903 return true;
3904 }
3905
3906
3907 /* Function vect_pattern_recog
3908
3909 Input:
3910 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3911 computation idioms.
3912
3913 Output - for each computation idiom that is detected we create a new stmt
3914 that provides the same functionality and that can be vectorized. We
3915 also record some information in the struct_stmt_info of the relevant
3916 stmts, as explained below:
3917
3918 At the entry to this function we have the following stmts, with the
3919 following initial value in the STMT_VINFO fields:
3920
3921 stmt in_pattern_p related_stmt vec_stmt
3922 S1: a_i = .... - - -
3923 S2: a_2 = ..use(a_i).. - - -
3924 S3: a_1 = ..use(a_2).. - - -
3925 S4: a_0 = ..use(a_1).. - - -
3926 S5: ... = ..use(a_0).. - - -
3927
3928 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3929 represented by a single stmt. We then:
3930 - create a new stmt S6 equivalent to the pattern (the stmt is not
3931 inserted into the code)
3932 - fill in the STMT_VINFO fields as follows:
3933
3934 in_pattern_p related_stmt vec_stmt
3935 S1: a_i = .... - - -
3936 S2: a_2 = ..use(a_i).. - - -
3937 S3: a_1 = ..use(a_2).. - - -
3938 S4: a_0 = ..use(a_1).. true S6 -
3939 '---> S6: a_new = .... - S4 -
3940 S5: ... = ..use(a_0).. - - -
3941
3942 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3943 to each other through the RELATED_STMT field).
3944
3945 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3946 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3947 remain irrelevant unless used by stmts other than S4.
3948
3949 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3950 (because they are marked as irrelevant). It will vectorize S6, and record
3951 a pointer to the new vector stmt VS6 from S6 (as usual).
3952 S4 will be skipped, and S5 will be vectorized as usual:
3953
3954 in_pattern_p related_stmt vec_stmt
3955 S1: a_i = .... - - -
3956 S2: a_2 = ..use(a_i).. - - -
3957 S3: a_1 = ..use(a_2).. - - -
3958 > VS6: va_new = .... - - -
3959 S4: a_0 = ..use(a_1).. true S6 VS6
3960 '---> S6: a_new = .... - S4 VS6
3961 > VS5: ... = ..vuse(va_new).. - - -
3962 S5: ... = ..use(a_0).. - - -
3963
3964 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3965 elsewhere), and we'll end up with:
3966
3967 VS6: va_new = ....
3968 VS5: ... = ..vuse(va_new)..
3969
3970 In case of more than one pattern statements, e.g., widen-mult with
3971 intermediate type:
3972
3973 S1 a_t = ;
3974 S2 a_T = (TYPE) a_t;
3975 '--> S3: a_it = (interm_type) a_t;
3976 S4 prod_T = a_T * CONST;
3977 '--> S5: prod_T' = a_it w* CONST;
3978
3979 there may be other users of a_T outside the pattern. In that case S2 will
3980 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3981 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3982 be recorded in S3. */
3983
3984 void
3985 vect_pattern_recog (vec_info *vinfo)
3986 {
3987 struct loop *loop;
3988 basic_block *bbs;
3989 unsigned int nbbs;
3990 gimple_stmt_iterator si;
3991 unsigned int i, j;
3992 auto_vec<gimple *, 1> stmts_to_replace;
3993 gimple *stmt;
3994
3995 if (dump_enabled_p ())
3996 dump_printf_loc (MSG_NOTE, vect_location,
3997 "=== vect_pattern_recog ===\n");
3998
3999 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4000 {
4001 loop = LOOP_VINFO_LOOP (loop_vinfo);
4002 bbs = LOOP_VINFO_BBS (loop_vinfo);
4003 nbbs = loop->num_nodes;
4004
4005 /* Scan through the loop stmts, applying the pattern recognition
4006 functions starting at each stmt visited: */
4007 for (i = 0; i < nbbs; i++)
4008 {
4009 basic_block bb = bbs[i];
4010 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4011 {
4012 /* Scan over all generic vect_recog_xxx_pattern functions. */
4013 for (j = 0; j < NUM_PATTERNS; j++)
4014 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4015 &stmts_to_replace))
4016 break;
4017 }
4018 }
4019 }
4020 else
4021 {
4022 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4023 for (si = bb_vinfo->region_begin;
4024 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4025 {
4026 if ((stmt = gsi_stmt (si))
4027 && vinfo_for_stmt (stmt)
4028 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4029 continue;
4030
4031 /* Scan over all generic vect_recog_xxx_pattern functions. */
4032 for (j = 0; j < NUM_PATTERNS; j++)
4033 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4034 &stmts_to_replace))
4035 break;
4036 }
4037 }
4038 }