]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-patterns.c
/cp
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51
52 /* Return true if we have a useful VR_RANGE range for VAR, storing it
53 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
54
55 static bool
56 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
57 {
58 value_range_kind vr_type = get_range_info (var, min_value, max_value);
59 wide_int nonzero = get_nonzero_bits (var);
60 signop sgn = TYPE_SIGN (TREE_TYPE (var));
61 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
62 nonzero, sgn) == VR_RANGE)
63 {
64 if (dump_enabled_p ())
65 {
66 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
67 dump_printf (MSG_NOTE, " has range [");
68 dump_hex (MSG_NOTE, *min_value);
69 dump_printf (MSG_NOTE, ", ");
70 dump_hex (MSG_NOTE, *max_value);
71 dump_printf (MSG_NOTE, "]\n");
72 }
73 return true;
74 }
75 else
76 {
77 if (dump_enabled_p ())
78 {
79 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 dump_printf (MSG_NOTE, " has no range info\n");
81 }
82 return false;
83 }
84 }
85
86 /* Report that we've found an instance of pattern PATTERN in
87 statement STMT. */
88
89 static void
90 vect_pattern_detected (const char *name, gimple *stmt)
91 {
92 if (dump_enabled_p ())
93 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
94 }
95
96 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
97 return the pattern statement's stmt_vec_info. Set its vector type to
98 VECTYPE if it doesn't have one already. */
99
100 static stmt_vec_info
101 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
102 tree vectype)
103 {
104 vec_info *vinfo = orig_stmt_info->vinfo;
105 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
106 if (pattern_stmt_info == NULL)
107 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
108 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
109
110 pattern_stmt_info->pattern_stmt_p = true;
111 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
112 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
113 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
114 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
115 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
116 return pattern_stmt_info;
117 }
118
119 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
120 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
121 have one already. */
122
123 static void
124 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
125 tree vectype)
126 {
127 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
128 STMT_VINFO_RELATED_STMT (orig_stmt_info)
129 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
130 }
131
132 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
133 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
134 be different from the vector type of the final pattern statement. */
135
136 static inline void
137 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
138 tree vectype = NULL_TREE)
139 {
140 vec_info *vinfo = stmt_info->vinfo;
141 if (vectype)
142 {
143 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
144 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
145 }
146 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
147 new_stmt);
148 }
149
150 /* The caller wants to perform new operations on vect_external variable
151 VAR, so that the result of the operations would also be vect_external.
152 Return the edge on which the operations can be performed, if one exists.
153 Return null if the operations should instead be treated as part of
154 the pattern that needs them. */
155
156 static edge
157 vect_get_external_def_edge (vec_info *vinfo, tree var)
158 {
159 edge e = NULL;
160 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
161 {
162 e = loop_preheader_edge (loop_vinfo->loop);
163 if (!SSA_NAME_IS_DEFAULT_DEF (var))
164 {
165 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
166 if (bb == NULL
167 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
168 e = NULL;
169 }
170 }
171 return e;
172 }
173
174 /* Return true if the target supports a vector version of CODE,
175 where CODE is known to map to a direct optab. ITYPE specifies
176 the type of (some of) the scalar inputs and OTYPE specifies the
177 type of the scalar result.
178
179 If CODE allows the inputs and outputs to have different type
180 (such as for WIDEN_SUM_EXPR), it is the input mode rather
181 than the output mode that determines the appropriate target pattern.
182 Operand 0 of the target pattern then specifies the mode that the output
183 must have.
184
185 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
186 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
187 is nonnull. */
188
189 static bool
190 vect_supportable_direct_optab_p (tree otype, tree_code code,
191 tree itype, tree *vecotype_out,
192 tree *vecitype_out = NULL)
193 {
194 tree vecitype = get_vectype_for_scalar_type (itype);
195 if (!vecitype)
196 return false;
197
198 tree vecotype = get_vectype_for_scalar_type (otype);
199 if (!vecotype)
200 return false;
201
202 optab optab = optab_for_tree_code (code, vecitype, optab_default);
203 if (!optab)
204 return false;
205
206 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
207 if (icode == CODE_FOR_nothing
208 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
209 return false;
210
211 *vecotype_out = vecotype;
212 if (vecitype_out)
213 *vecitype_out = vecitype;
214 return true;
215 }
216
217 /* Round bit precision PRECISION up to a full element. */
218
219 static unsigned int
220 vect_element_precision (unsigned int precision)
221 {
222 precision = 1 << ceil_log2 (precision);
223 return MAX (precision, BITS_PER_UNIT);
224 }
225
226 /* If OP is defined by a statement that's being considered for vectorization,
227 return information about that statement, otherwise return NULL. */
228
229 static stmt_vec_info
230 vect_get_internal_def (vec_info *vinfo, tree op)
231 {
232 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
233 if (def_stmt_info
234 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
235 return def_stmt_info;
236 return NULL;
237 }
238
239 /* Check whether NAME, an ssa-name used in STMT_VINFO,
240 is a result of a type promotion, such that:
241 DEF_STMT: NAME = NOP (name0)
242 If CHECK_SIGN is TRUE, check that either both types are signed or both are
243 unsigned. */
244
245 static bool
246 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
247 tree *orig_type, gimple **def_stmt, bool *promotion)
248 {
249 tree type = TREE_TYPE (name);
250 tree oprnd0;
251 enum vect_def_type dt;
252
253 stmt_vec_info def_stmt_info;
254 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
255 def_stmt))
256 return false;
257
258 if (dt != vect_internal_def
259 && dt != vect_external_def && dt != vect_constant_def)
260 return false;
261
262 if (!*def_stmt)
263 return false;
264
265 if (!is_gimple_assign (*def_stmt))
266 return false;
267
268 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
269 return false;
270
271 oprnd0 = gimple_assign_rhs1 (*def_stmt);
272
273 *orig_type = TREE_TYPE (oprnd0);
274 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
275 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
276 return false;
277
278 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
279 *promotion = true;
280 else
281 *promotion = false;
282
283 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
284 return false;
285
286 return true;
287 }
288
289 /* Holds information about an input operand after some sign changes
290 and type promotions have been peeled away. */
291 class vect_unpromoted_value {
292 public:
293 vect_unpromoted_value ();
294
295 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
296
297 /* The value obtained after peeling away zero or more casts. */
298 tree op;
299
300 /* The type of OP. */
301 tree type;
302
303 /* The definition type of OP. */
304 vect_def_type dt;
305
306 /* If OP is the result of peeling at least one cast, and if the cast
307 of OP itself is a vectorizable statement, CASTER identifies that
308 statement, otherwise it is null. */
309 stmt_vec_info caster;
310 };
311
312 inline vect_unpromoted_value::vect_unpromoted_value ()
313 : op (NULL_TREE),
314 type (NULL_TREE),
315 dt (vect_uninitialized_def),
316 caster (NULL)
317 {
318 }
319
320 /* Set the operand to OP_IN, its definition type to DT_IN, and the
321 statement that casts it to CASTER_IN. */
322
323 inline void
324 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
325 stmt_vec_info caster_in)
326 {
327 op = op_in;
328 type = TREE_TYPE (op);
329 dt = dt_in;
330 caster = caster_in;
331 }
332
333 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
334 to reach some vectorizable inner operand OP', continuing as long as it
335 is possible to convert OP' back to OP using a possible sign change
336 followed by a possible promotion P. Return this OP', or null if OP is
337 not a vectorizable SSA name. If there is a promotion P, describe its
338 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
339 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
340 have more than one user.
341
342 A successful return means that it is possible to go from OP' to OP
343 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
344 whereas the cast from UNPROM to OP might be a promotion, a sign
345 change, or a nop.
346
347 E.g. say we have:
348
349 signed short *ptr = ...;
350 signed short C = *ptr;
351 unsigned short B = (unsigned short) C; // sign change
352 signed int A = (signed int) B; // unsigned promotion
353 ...possible other uses of A...
354 unsigned int OP = (unsigned int) A; // sign change
355
356 In this case it's possible to go directly from C to OP using:
357
358 OP = (unsigned int) (unsigned short) C;
359 +------------+ +--------------+
360 promotion sign change
361
362 so OP' would be C. The input to the promotion is B, so UNPROM
363 would describe B. */
364
365 static tree
366 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
367 vect_unpromoted_value *unprom,
368 bool *single_use_p = NULL)
369 {
370 tree res = NULL_TREE;
371 tree op_type = TREE_TYPE (op);
372 unsigned int orig_precision = TYPE_PRECISION (op_type);
373 unsigned int min_precision = orig_precision;
374 stmt_vec_info caster = NULL;
375 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
376 {
377 /* See whether OP is simple enough to vectorize. */
378 stmt_vec_info def_stmt_info;
379 gimple *def_stmt;
380 vect_def_type dt;
381 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
382 break;
383
384 /* If OP is the input of a demotion, skip over it to see whether
385 OP is itself the result of a promotion. If so, the combined
386 effect of the promotion and the demotion might fit the required
387 pattern, otherwise neither operation fits.
388
389 This copes with cases such as the result of an arithmetic
390 operation being truncated before being stored, and where that
391 arithmetic operation has been recognized as an over-widened one. */
392 if (TYPE_PRECISION (op_type) <= min_precision)
393 {
394 /* Use OP as the UNPROM described above if we haven't yet
395 found a promotion, or if using the new input preserves the
396 sign of the previous promotion. */
397 if (!res
398 || TYPE_PRECISION (unprom->type) == orig_precision
399 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
400 {
401 unprom->set_op (op, dt, caster);
402 min_precision = TYPE_PRECISION (op_type);
403 }
404 /* Stop if we've already seen a promotion and if this
405 conversion does more than change the sign. */
406 else if (TYPE_PRECISION (op_type)
407 != TYPE_PRECISION (unprom->type))
408 break;
409
410 /* The sequence now extends to OP. */
411 res = op;
412 }
413
414 /* See whether OP is defined by a cast. Record it as CASTER if
415 the cast is potentially vectorizable. */
416 if (!def_stmt)
417 break;
418 caster = def_stmt_info;
419
420 /* Ignore pattern statements, since we don't link uses for them. */
421 if (caster
422 && single_use_p
423 && !STMT_VINFO_RELATED_STMT (caster)
424 && !has_single_use (res))
425 *single_use_p = false;
426
427 gassign *assign = dyn_cast <gassign *> (def_stmt);
428 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
429 break;
430
431 /* Continue with the input to the cast. */
432 op = gimple_assign_rhs1 (def_stmt);
433 op_type = TREE_TYPE (op);
434 }
435 return res;
436 }
437
438 /* OP is an integer operand to an operation that returns TYPE, and we
439 want to treat the operation as a widening one. So far we can treat
440 it as widening from *COMMON_TYPE.
441
442 Return true if OP is suitable for such a widening operation,
443 either widening from *COMMON_TYPE or from some supertype of it.
444 Update *COMMON_TYPE to the supertype in the latter case.
445
446 SHIFT_P is true if OP is a shift amount. */
447
448 static bool
449 vect_joust_widened_integer (tree type, bool shift_p, tree op,
450 tree *common_type)
451 {
452 /* Calculate the minimum precision required by OP, without changing
453 the sign of either operand. */
454 unsigned int precision;
455 if (shift_p)
456 {
457 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
458 return false;
459 precision = TREE_INT_CST_LOW (op);
460 }
461 else
462 {
463 precision = wi::min_precision (wi::to_widest (op),
464 TYPE_SIGN (*common_type));
465 if (precision * 2 > TYPE_PRECISION (type))
466 return false;
467 }
468
469 /* If OP requires a wider type, switch to that type. The checks
470 above ensure that this is still narrower than the result. */
471 precision = vect_element_precision (precision);
472 if (TYPE_PRECISION (*common_type) < precision)
473 *common_type = build_nonstandard_integer_type
474 (precision, TYPE_UNSIGNED (*common_type));
475 return true;
476 }
477
478 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
479 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
480
481 static bool
482 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
483 {
484 if (types_compatible_p (*common_type, new_type))
485 return true;
486
487 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
488 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
489 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
490 return true;
491
492 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
493 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
494 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
495 {
496 *common_type = new_type;
497 return true;
498 }
499
500 /* We have mismatched signs, with the signed type being
501 no wider than the unsigned type. In this case we need
502 a wider signed type. */
503 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
504 TYPE_PRECISION (new_type));
505 precision *= 2;
506 if (precision * 2 > TYPE_PRECISION (type))
507 return false;
508
509 *common_type = build_nonstandard_integer_type (precision, false);
510 return true;
511 }
512
513 /* Check whether STMT_INFO can be viewed as a tree of integer operations
514 in which each node either performs CODE or WIDENED_CODE, and where
515 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
516 specifies the maximum number of leaf operands. SHIFT_P says whether
517 CODE and WIDENED_CODE are some sort of shift.
518
519 If STMT_INFO is such a tree, return the number of leaf operands
520 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
521 to a type that (a) is narrower than the result of STMT_INFO and
522 (b) can hold all leaf operand values.
523
524 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
525 exists. */
526
527 static unsigned int
528 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
529 tree_code widened_code, bool shift_p,
530 unsigned int max_nops,
531 vect_unpromoted_value *unprom, tree *common_type)
532 {
533 /* Check for an integer operation with the right code. */
534 vec_info *vinfo = stmt_info->vinfo;
535 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
536 if (!assign)
537 return 0;
538
539 tree_code rhs_code = gimple_assign_rhs_code (assign);
540 if (rhs_code != code && rhs_code != widened_code)
541 return 0;
542
543 tree type = gimple_expr_type (assign);
544 if (!INTEGRAL_TYPE_P (type))
545 return 0;
546
547 /* Assume that both operands will be leaf operands. */
548 max_nops -= 2;
549
550 /* Check the operands. */
551 unsigned int next_op = 0;
552 for (unsigned int i = 0; i < 2; ++i)
553 {
554 vect_unpromoted_value *this_unprom = &unprom[next_op];
555 unsigned int nops = 1;
556 tree op = gimple_op (assign, i + 1);
557 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
558 {
559 /* We already have a common type from earlier operands.
560 Update it to account for OP. */
561 this_unprom->set_op (op, vect_constant_def);
562 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
563 return 0;
564 }
565 else
566 {
567 /* Only allow shifts by constants. */
568 if (shift_p && i == 1)
569 return 0;
570
571 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
572 this_unprom))
573 return 0;
574
575 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
576 {
577 /* The operand isn't widened. If STMT_INFO has the code
578 for an unwidened operation, recursively check whether
579 this operand is a node of the tree. */
580 if (rhs_code != code
581 || max_nops == 0
582 || this_unprom->dt != vect_internal_def)
583 return 0;
584
585 /* Give back the leaf slot allocated above now that we're
586 not treating this as a leaf operand. */
587 max_nops += 1;
588
589 /* Recursively process the definition of the operand. */
590 stmt_vec_info def_stmt_info
591 = vinfo->lookup_def (this_unprom->op);
592 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
593 shift_p, max_nops, this_unprom,
594 common_type);
595 if (nops == 0)
596 return 0;
597
598 max_nops -= nops;
599 }
600 else
601 {
602 /* Make sure that the operand is narrower than the result. */
603 if (TYPE_PRECISION (this_unprom->type) * 2
604 > TYPE_PRECISION (type))
605 return 0;
606
607 /* Update COMMON_TYPE for the new operand. */
608 if (i == 0)
609 *common_type = this_unprom->type;
610 else if (!vect_joust_widened_type (type, this_unprom->type,
611 common_type))
612 return 0;
613 }
614 }
615 next_op += nops;
616 }
617 return next_op;
618 }
619
620 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
621 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
622
623 static tree
624 vect_recog_temp_ssa_var (tree type, gimple *stmt)
625 {
626 return make_temp_ssa_name (type, stmt, "patt");
627 }
628
629 /* STMT2_INFO describes a type conversion that could be split into STMT1
630 followed by a version of STMT2_INFO that takes NEW_RHS as its first
631 input. Try to do this using pattern statements, returning true on
632 success. */
633
634 static bool
635 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
636 gimple *stmt1, tree vectype)
637 {
638 if (is_pattern_stmt_p (stmt2_info))
639 {
640 /* STMT2_INFO is part of a pattern. Get the statement to which
641 the pattern is attached. */
642 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
643 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
644
645 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE, vect_location,
647 "Splitting pattern statement: %G", stmt2_info->stmt);
648
649 /* Since STMT2_INFO is a pattern statement, we can change it
650 in-situ without worrying about changing the code for the
651 containing block. */
652 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
653
654 if (dump_enabled_p ())
655 {
656 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
657 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
658 stmt2_info->stmt);
659 }
660
661 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
662 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
663 /* STMT2_INFO is the actual pattern statement. Add STMT1
664 to the end of the definition sequence. */
665 gimple_seq_add_stmt_without_update (def_seq, stmt1);
666 else
667 {
668 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
669 before it. */
670 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
671 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
672 }
673 return true;
674 }
675 else
676 {
677 /* STMT2_INFO doesn't yet have a pattern. Try to create a
678 two-statement pattern now. */
679 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
680 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
681 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
682 if (!lhs_vectype)
683 return false;
684
685 if (dump_enabled_p ())
686 dump_printf_loc (MSG_NOTE, vect_location,
687 "Splitting statement: %G", stmt2_info->stmt);
688
689 /* Add STMT1 as a singleton pattern definition sequence. */
690 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
691 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
692 gimple_seq_add_stmt_without_update (def_seq, stmt1);
693
694 /* Build the second of the two pattern statements. */
695 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
696 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
697 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
698
699 if (dump_enabled_p ())
700 {
701 dump_printf_loc (MSG_NOTE, vect_location,
702 "into pattern statements: %G", stmt1);
703 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
704 }
705
706 return true;
707 }
708 }
709
710 /* Convert UNPROM to TYPE and return the result, adding new statements
711 to STMT_INFO's pattern definition statements if no better way is
712 available. VECTYPE is the vector form of TYPE. */
713
714 static tree
715 vect_convert_input (stmt_vec_info stmt_info, tree type,
716 vect_unpromoted_value *unprom, tree vectype)
717 {
718 /* Check for a no-op conversion. */
719 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
720 return unprom->op;
721
722 /* Allow the caller to create constant vect_unpromoted_values. */
723 if (TREE_CODE (unprom->op) == INTEGER_CST)
724 return wide_int_to_tree (type, wi::to_widest (unprom->op));
725
726 tree input = unprom->op;
727 if (unprom->caster)
728 {
729 tree lhs = gimple_get_lhs (unprom->caster->stmt);
730 tree lhs_type = TREE_TYPE (lhs);
731
732 /* If the result of the existing cast is the right width, use it
733 instead of the source of the cast. */
734 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
735 input = lhs;
736 /* If the precision we want is between the source and result
737 precisions of the existing cast, try splitting the cast into
738 two and tapping into a mid-way point. */
739 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
740 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
741 {
742 /* In order to preserve the semantics of the original cast,
743 give the mid-way point the same signedness as the input value.
744
745 It would be possible to use a signed type here instead if
746 TYPE is signed and UNPROM->TYPE is unsigned, but that would
747 make the sign of the midtype sensitive to the order in
748 which we process the statements, since the signedness of
749 TYPE is the signedness required by just one of possibly
750 many users. Also, unsigned promotions are usually as cheap
751 as or cheaper than signed ones, so it's better to keep an
752 unsigned promotion. */
753 tree midtype = build_nonstandard_integer_type
754 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
755 tree vec_midtype = get_vectype_for_scalar_type (midtype);
756 if (vec_midtype)
757 {
758 input = vect_recog_temp_ssa_var (midtype, NULL);
759 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
760 unprom->op);
761 if (!vect_split_statement (unprom->caster, input, new_stmt,
762 vec_midtype))
763 append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
764 }
765 }
766
767 /* See if we can reuse an existing result. */
768 if (types_compatible_p (type, TREE_TYPE (input)))
769 return input;
770 }
771
772 /* We need a new conversion statement. */
773 tree new_op = vect_recog_temp_ssa_var (type, NULL);
774 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
775
776 /* If OP is an external value, see if we can insert the new statement
777 on an incoming edge. */
778 if (input == unprom->op && unprom->dt == vect_external_def)
779 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
780 {
781 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
782 gcc_assert (!new_bb);
783 return new_op;
784 }
785
786 /* As a (common) last resort, add the statement to the pattern itself. */
787 append_pattern_def_seq (stmt_info, new_stmt, vectype);
788 return new_op;
789 }
790
791 /* Invoke vect_convert_input for N elements of UNPROM and store the
792 result in the corresponding elements of RESULT. */
793
794 static void
795 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
796 tree *result, tree type, vect_unpromoted_value *unprom,
797 tree vectype)
798 {
799 for (unsigned int i = 0; i < n; ++i)
800 {
801 unsigned int j;
802 for (j = 0; j < i; ++j)
803 if (unprom[j].op == unprom[i].op)
804 break;
805 if (j < i)
806 result[i] = result[j];
807 else
808 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
809 }
810 }
811
812 /* The caller has created a (possibly empty) sequence of pattern definition
813 statements followed by a single statement PATTERN_STMT. Cast the result
814 of this final statement to TYPE. If a new statement is needed, add
815 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
816 and return the new statement, otherwise return PATTERN_STMT as-is.
817 VECITYPE is the vector form of PATTERN_STMT's result type. */
818
819 static gimple *
820 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
821 tree vecitype)
822 {
823 tree lhs = gimple_get_lhs (pattern_stmt);
824 if (!types_compatible_p (type, TREE_TYPE (lhs)))
825 {
826 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
827 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
828 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
829 }
830 return pattern_stmt;
831 }
832
833 /* Return true if STMT_VINFO describes a reduction for which reassociation
834 is allowed. If STMT_INFO is part of a group, assume that it's part of
835 a reduction chain and optimistically assume that all statements
836 except the last allow reassociation. */
837
838 static bool
839 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
840 {
841 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
842 return (STMT_VINFO_REDUC_TYPE (STMT_VINFO_REDUC_DEF (stmt_vinfo))
843 != FOLD_LEFT_REDUCTION);
844 else
845 return REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL;
846 }
847
848 /* As above, but also require it to have code CODE and to be a reduction
849 in the outermost loop. When returning true, store the operands in
850 *OP0_OUT and *OP1_OUT. */
851
852 static bool
853 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
854 tree *op0_out, tree *op1_out)
855 {
856 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
857 if (!loop_info)
858 return false;
859
860 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
861 if (!assign || gimple_assign_rhs_code (assign) != code)
862 return false;
863
864 /* We don't allow changing the order of the computation in the inner-loop
865 when doing outer-loop vectorization. */
866 class loop *loop = LOOP_VINFO_LOOP (loop_info);
867 if (loop && nested_in_vect_loop_p (loop, stmt_info))
868 return false;
869
870 if (!vect_reassociating_reduction_p (stmt_info))
871 return false;
872
873 *op0_out = gimple_assign_rhs1 (assign);
874 *op1_out = gimple_assign_rhs2 (assign);
875 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
876 std::swap (*op0_out, *op1_out);
877 return true;
878 }
879
880 /* Function vect_recog_dot_prod_pattern
881
882 Try to find the following pattern:
883
884 type x_t, y_t;
885 TYPE1 prod;
886 TYPE2 sum = init;
887 loop:
888 sum_0 = phi <init, sum_1>
889 S1 x_t = ...
890 S2 y_t = ...
891 S3 x_T = (TYPE1) x_t;
892 S4 y_T = (TYPE1) y_t;
893 S5 prod = x_T * y_T;
894 [S6 prod = (TYPE2) prod; #optional]
895 S7 sum_1 = prod + sum_0;
896
897 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
898 same size of 'TYPE1' or bigger. This is a special case of a reduction
899 computation.
900
901 Input:
902
903 * STMT_VINFO: The stmt from which the pattern search begins. In the
904 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
905 will be detected.
906
907 Output:
908
909 * TYPE_OUT: The type of the output of this pattern.
910
911 * Return value: A new stmt that will be used to replace the sequence of
912 stmts that constitute the pattern. In this case it will be:
913 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
914
915 Note: The dot-prod idiom is a widening reduction pattern that is
916 vectorized without preserving all the intermediate results. It
917 produces only N/2 (widened) results (by summing up pairs of
918 intermediate results) rather than all N results. Therefore, we
919 cannot allow this pattern when we want to get all the results and in
920 the correct order (as is the case when this computation is in an
921 inner-loop nested in an outer-loop that us being vectorized). */
922
923 static gimple *
924 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
925 {
926 tree oprnd0, oprnd1;
927 gimple *last_stmt = stmt_vinfo->stmt;
928 vec_info *vinfo = stmt_vinfo->vinfo;
929 tree type, half_type;
930 gimple *pattern_stmt;
931 tree var;
932
933 /* Look for the following pattern
934 DX = (TYPE1) X;
935 DY = (TYPE1) Y;
936 DPROD = DX * DY;
937 DDPROD = (TYPE2) DPROD;
938 sum_1 = DDPROD + sum_0;
939 In which
940 - DX is double the size of X
941 - DY is double the size of Y
942 - DX, DY, DPROD all have the same type
943 - sum is the same size of DPROD or bigger
944 - sum has been recognized as a reduction variable.
945
946 This is equivalent to:
947 DPROD = X w* Y; #widen mult
948 sum_1 = DPROD w+ sum_0; #widen summation
949 or
950 DPROD = X w* Y; #widen mult
951 sum_1 = DPROD + sum_0; #summation
952 */
953
954 /* Starting from LAST_STMT, follow the defs of its uses in search
955 of the above pattern. */
956
957 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
958 &oprnd0, &oprnd1))
959 return NULL;
960
961 type = gimple_expr_type (last_stmt);
962
963 vect_unpromoted_value unprom_mult;
964 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
965
966 /* So far so good. Since last_stmt was detected as a (summation) reduction,
967 we know that oprnd1 is the reduction variable (defined by a loop-header
968 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
969 Left to check that oprnd0 is defined by a (widen_)mult_expr */
970 if (!oprnd0)
971 return NULL;
972
973 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
974 if (!mult_vinfo)
975 return NULL;
976
977 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
978 inside the loop (in case we are analyzing an outer-loop). */
979 vect_unpromoted_value unprom0[2];
980 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
981 false, 2, unprom0, &half_type))
982 return NULL;
983
984 /* If there are two widening operations, make sure they agree on
985 the sign of the extension. */
986 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
987 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
988 return NULL;
989
990 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
991
992 tree half_vectype;
993 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
994 type_out, &half_vectype))
995 return NULL;
996
997 /* Get the inputs in the appropriate types. */
998 tree mult_oprnd[2];
999 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
1000 unprom0, half_vectype);
1001
1002 var = vect_recog_temp_ssa_var (type, NULL);
1003 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1004 mult_oprnd[0], mult_oprnd[1], oprnd1);
1005
1006 return pattern_stmt;
1007 }
1008
1009
1010 /* Function vect_recog_sad_pattern
1011
1012 Try to find the following Sum of Absolute Difference (SAD) pattern:
1013
1014 type x_t, y_t;
1015 signed TYPE1 diff, abs_diff;
1016 TYPE2 sum = init;
1017 loop:
1018 sum_0 = phi <init, sum_1>
1019 S1 x_t = ...
1020 S2 y_t = ...
1021 S3 x_T = (TYPE1) x_t;
1022 S4 y_T = (TYPE1) y_t;
1023 S5 diff = x_T - y_T;
1024 S6 abs_diff = ABS_EXPR <diff>;
1025 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1026 S8 sum_1 = abs_diff + sum_0;
1027
1028 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1029 same size of 'TYPE1' or bigger. This is a special case of a reduction
1030 computation.
1031
1032 Input:
1033
1034 * STMT_VINFO: The stmt from which the pattern search begins. In the
1035 example, when this function is called with S8, the pattern
1036 {S3,S4,S5,S6,S7,S8} will be detected.
1037
1038 Output:
1039
1040 * TYPE_OUT: The type of the output of this pattern.
1041
1042 * Return value: A new stmt that will be used to replace the sequence of
1043 stmts that constitute the pattern. In this case it will be:
1044 SAD_EXPR <x_t, y_t, sum_0>
1045 */
1046
1047 static gimple *
1048 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1049 {
1050 gimple *last_stmt = stmt_vinfo->stmt;
1051 vec_info *vinfo = stmt_vinfo->vinfo;
1052 tree half_type;
1053
1054 /* Look for the following pattern
1055 DX = (TYPE1) X;
1056 DY = (TYPE1) Y;
1057 DDIFF = DX - DY;
1058 DAD = ABS_EXPR <DDIFF>;
1059 DDPROD = (TYPE2) DPROD;
1060 sum_1 = DAD + sum_0;
1061 In which
1062 - DX is at least double the size of X
1063 - DY is at least double the size of Y
1064 - DX, DY, DDIFF, DAD all have the same type
1065 - sum is the same size of DAD or bigger
1066 - sum has been recognized as a reduction variable.
1067
1068 This is equivalent to:
1069 DDIFF = X w- Y; #widen sub
1070 DAD = ABS_EXPR <DDIFF>;
1071 sum_1 = DAD w+ sum_0; #widen summation
1072 or
1073 DDIFF = X w- Y; #widen sub
1074 DAD = ABS_EXPR <DDIFF>;
1075 sum_1 = DAD + sum_0; #summation
1076 */
1077
1078 /* Starting from LAST_STMT, follow the defs of its uses in search
1079 of the above pattern. */
1080
1081 tree plus_oprnd0, plus_oprnd1;
1082 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1083 &plus_oprnd0, &plus_oprnd1))
1084 return NULL;
1085
1086 tree sum_type = gimple_expr_type (last_stmt);
1087
1088 /* Any non-truncating sequence of conversions is OK here, since
1089 with a successful match, the result of the ABS(U) is known to fit
1090 within the nonnegative range of the result type. (It cannot be the
1091 negative of the minimum signed value due to the range of the widening
1092 MINUS_EXPR.) */
1093 vect_unpromoted_value unprom_abs;
1094 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1095 &unprom_abs);
1096
1097 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1098 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1099 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1100 Then check that plus_oprnd0 is defined by an abs_expr. */
1101
1102 if (!plus_oprnd0)
1103 return NULL;
1104
1105 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1106 if (!abs_stmt_vinfo)
1107 return NULL;
1108
1109 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1110 inside the loop (in case we are analyzing an outer-loop). */
1111 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1112 if (!abs_stmt
1113 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1114 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1115 return NULL;
1116
1117 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1118 tree abs_type = TREE_TYPE (abs_oprnd);
1119 if (TYPE_UNSIGNED (abs_type))
1120 return NULL;
1121
1122 /* Peel off conversions from the ABS input. This can involve sign
1123 changes (e.g. from an unsigned subtraction to a signed ABS input)
1124 or signed promotion, but it can't include unsigned promotion.
1125 (Note that ABS of an unsigned promotion should have been folded
1126 away before now anyway.) */
1127 vect_unpromoted_value unprom_diff;
1128 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1129 &unprom_diff);
1130 if (!abs_oprnd)
1131 return NULL;
1132 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1133 && TYPE_UNSIGNED (unprom_diff.type))
1134 return NULL;
1135
1136 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1137 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1138 if (!diff_stmt_vinfo)
1139 return NULL;
1140
1141 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1142 inside the loop (in case we are analyzing an outer-loop). */
1143 vect_unpromoted_value unprom[2];
1144 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1145 false, 2, unprom, &half_type))
1146 return NULL;
1147
1148 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1149
1150 tree half_vectype;
1151 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1152 type_out, &half_vectype))
1153 return NULL;
1154
1155 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1156 tree sad_oprnd[2];
1157 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1158 unprom, half_vectype);
1159
1160 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1161 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1162 sad_oprnd[1], plus_oprnd1);
1163
1164 return pattern_stmt;
1165 }
1166
1167 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1168 so that it can be treated as though it had the form:
1169
1170 A_TYPE a;
1171 B_TYPE b;
1172 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1173 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1174 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1175 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1176 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1177
1178 Try to replace the pattern with:
1179
1180 A_TYPE a;
1181 B_TYPE b;
1182 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1183 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1184 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1185 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1186
1187 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1188
1189 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1190 name of the pattern being matched, for dump purposes. */
1191
1192 static gimple *
1193 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1194 tree_code orig_code, tree_code wide_code,
1195 bool shift_p, const char *name)
1196 {
1197 gimple *last_stmt = last_stmt_info->stmt;
1198
1199 vect_unpromoted_value unprom[2];
1200 tree half_type;
1201 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1202 shift_p, 2, unprom, &half_type))
1203 return NULL;
1204
1205 /* Pattern detected. */
1206 vect_pattern_detected (name, last_stmt);
1207
1208 tree type = gimple_expr_type (last_stmt);
1209 tree itype = type;
1210 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1211 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1212 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1213 TYPE_UNSIGNED (half_type));
1214
1215 /* Check target support */
1216 tree vectype = get_vectype_for_scalar_type (half_type);
1217 tree vecitype = get_vectype_for_scalar_type (itype);
1218 enum tree_code dummy_code;
1219 int dummy_int;
1220 auto_vec<tree> dummy_vec;
1221 if (!vectype
1222 || !vecitype
1223 || !supportable_widening_operation (wide_code, last_stmt_info,
1224 vecitype, vectype,
1225 &dummy_code, &dummy_code,
1226 &dummy_int, &dummy_vec))
1227 return NULL;
1228
1229 *type_out = get_vectype_for_scalar_type (type);
1230 if (!*type_out)
1231 return NULL;
1232
1233 tree oprnd[2];
1234 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1235
1236 tree var = vect_recog_temp_ssa_var (itype, NULL);
1237 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1238 oprnd[0], oprnd[1]);
1239
1240 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1241 }
1242
1243 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1244 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1245
1246 static gimple *
1247 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1248 {
1249 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1250 WIDEN_MULT_EXPR, false,
1251 "vect_recog_widen_mult_pattern");
1252 }
1253
1254 /* Function vect_recog_pow_pattern
1255
1256 Try to find the following pattern:
1257
1258 x = POW (y, N);
1259
1260 with POW being one of pow, powf, powi, powif and N being
1261 either 2 or 0.5.
1262
1263 Input:
1264
1265 * STMT_VINFO: The stmt from which the pattern search begins.
1266
1267 Output:
1268
1269 * TYPE_OUT: The type of the output of this pattern.
1270
1271 * Return value: A new stmt that will be used to replace the sequence of
1272 stmts that constitute the pattern. In this case it will be:
1273 x = x * x
1274 or
1275 x = sqrt (x)
1276 */
1277
1278 static gimple *
1279 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1280 {
1281 gimple *last_stmt = stmt_vinfo->stmt;
1282 tree base, exp;
1283 gimple *stmt;
1284 tree var;
1285
1286 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1287 return NULL;
1288
1289 switch (gimple_call_combined_fn (last_stmt))
1290 {
1291 CASE_CFN_POW:
1292 CASE_CFN_POWI:
1293 break;
1294
1295 default:
1296 return NULL;
1297 }
1298
1299 base = gimple_call_arg (last_stmt, 0);
1300 exp = gimple_call_arg (last_stmt, 1);
1301 if (TREE_CODE (exp) != REAL_CST
1302 && TREE_CODE (exp) != INTEGER_CST)
1303 {
1304 if (flag_unsafe_math_optimizations
1305 && TREE_CODE (base) == REAL_CST
1306 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1307 {
1308 combined_fn log_cfn;
1309 built_in_function exp_bfn;
1310 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1311 {
1312 case BUILT_IN_POW:
1313 log_cfn = CFN_BUILT_IN_LOG;
1314 exp_bfn = BUILT_IN_EXP;
1315 break;
1316 case BUILT_IN_POWF:
1317 log_cfn = CFN_BUILT_IN_LOGF;
1318 exp_bfn = BUILT_IN_EXPF;
1319 break;
1320 case BUILT_IN_POWL:
1321 log_cfn = CFN_BUILT_IN_LOGL;
1322 exp_bfn = BUILT_IN_EXPL;
1323 break;
1324 default:
1325 return NULL;
1326 }
1327 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1328 tree exp_decl = builtin_decl_implicit (exp_bfn);
1329 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1330 does that, but if C is a power of 2, we want to use
1331 exp2 (log2 (C) * x) in the non-vectorized version, but for
1332 vectorization we don't have vectorized exp2. */
1333 if (logc
1334 && TREE_CODE (logc) == REAL_CST
1335 && exp_decl
1336 && lookup_attribute ("omp declare simd",
1337 DECL_ATTRIBUTES (exp_decl)))
1338 {
1339 cgraph_node *node = cgraph_node::get_create (exp_decl);
1340 if (node->simd_clones == NULL)
1341 {
1342 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1343 || node->definition)
1344 return NULL;
1345 expand_simd_clones (node);
1346 if (node->simd_clones == NULL)
1347 return NULL;
1348 }
1349 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1350 if (!*type_out)
1351 return NULL;
1352 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1353 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1354 append_pattern_def_seq (stmt_vinfo, g);
1355 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1356 g = gimple_build_call (exp_decl, 1, def);
1357 gimple_call_set_lhs (g, res);
1358 return g;
1359 }
1360 }
1361
1362 return NULL;
1363 }
1364
1365 /* We now have a pow or powi builtin function call with a constant
1366 exponent. */
1367
1368 /* Catch squaring. */
1369 if ((tree_fits_shwi_p (exp)
1370 && tree_to_shwi (exp) == 2)
1371 || (TREE_CODE (exp) == REAL_CST
1372 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1373 {
1374 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1375 TREE_TYPE (base), type_out))
1376 return NULL;
1377
1378 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1379 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1380 return stmt;
1381 }
1382
1383 /* Catch square root. */
1384 if (TREE_CODE (exp) == REAL_CST
1385 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1386 {
1387 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1388 if (*type_out
1389 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1390 OPTIMIZE_FOR_SPEED))
1391 {
1392 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1393 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1394 gimple_call_set_lhs (stmt, var);
1395 gimple_call_set_nothrow (stmt, true);
1396 return stmt;
1397 }
1398 }
1399
1400 return NULL;
1401 }
1402
1403
1404 /* Function vect_recog_widen_sum_pattern
1405
1406 Try to find the following pattern:
1407
1408 type x_t;
1409 TYPE x_T, sum = init;
1410 loop:
1411 sum_0 = phi <init, sum_1>
1412 S1 x_t = *p;
1413 S2 x_T = (TYPE) x_t;
1414 S3 sum_1 = x_T + sum_0;
1415
1416 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1417 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1418 a special case of a reduction computation.
1419
1420 Input:
1421
1422 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1423 when this function is called with S3, the pattern {S2,S3} will be detected.
1424
1425 Output:
1426
1427 * TYPE_OUT: The type of the output of this pattern.
1428
1429 * Return value: A new stmt that will be used to replace the sequence of
1430 stmts that constitute the pattern. In this case it will be:
1431 WIDEN_SUM <x_t, sum_0>
1432
1433 Note: The widening-sum idiom is a widening reduction pattern that is
1434 vectorized without preserving all the intermediate results. It
1435 produces only N/2 (widened) results (by summing up pairs of
1436 intermediate results) rather than all N results. Therefore, we
1437 cannot allow this pattern when we want to get all the results and in
1438 the correct order (as is the case when this computation is in an
1439 inner-loop nested in an outer-loop that us being vectorized). */
1440
1441 static gimple *
1442 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1443 {
1444 gimple *last_stmt = stmt_vinfo->stmt;
1445 tree oprnd0, oprnd1;
1446 vec_info *vinfo = stmt_vinfo->vinfo;
1447 tree type;
1448 gimple *pattern_stmt;
1449 tree var;
1450
1451 /* Look for the following pattern
1452 DX = (TYPE) X;
1453 sum_1 = DX + sum_0;
1454 In which DX is at least double the size of X, and sum_1 has been
1455 recognized as a reduction variable.
1456 */
1457
1458 /* Starting from LAST_STMT, follow the defs of its uses in search
1459 of the above pattern. */
1460
1461 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1462 &oprnd0, &oprnd1))
1463 return NULL;
1464
1465 type = gimple_expr_type (last_stmt);
1466
1467 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1468 we know that oprnd1 is the reduction variable (defined by a loop-header
1469 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1470 Left to check that oprnd0 is defined by a cast from type 'type' to type
1471 'TYPE'. */
1472
1473 vect_unpromoted_value unprom0;
1474 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1475 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1476 return NULL;
1477
1478 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1479
1480 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1481 type_out))
1482 return NULL;
1483
1484 var = vect_recog_temp_ssa_var (type, NULL);
1485 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1486
1487 return pattern_stmt;
1488 }
1489
1490 /* Recognize cases in which an operation is performed in one type WTYPE
1491 but could be done more efficiently in a narrower type NTYPE. For example,
1492 if we have:
1493
1494 ATYPE a; // narrower than NTYPE
1495 BTYPE b; // narrower than NTYPE
1496 WTYPE aw = (WTYPE) a;
1497 WTYPE bw = (WTYPE) b;
1498 WTYPE res = aw + bw; // only uses of aw and bw
1499
1500 then it would be more efficient to do:
1501
1502 NTYPE an = (NTYPE) a;
1503 NTYPE bn = (NTYPE) b;
1504 NTYPE resn = an + bn;
1505 WTYPE res = (WTYPE) resn;
1506
1507 Other situations include things like:
1508
1509 ATYPE a; // NTYPE or narrower
1510 WTYPE aw = (WTYPE) a;
1511 WTYPE res = aw + b;
1512
1513 when only "(NTYPE) res" is significant. In that case it's more efficient
1514 to truncate "b" and do the operation on NTYPE instead:
1515
1516 NTYPE an = (NTYPE) a;
1517 NTYPE bn = (NTYPE) b; // truncation
1518 NTYPE resn = an + bn;
1519 WTYPE res = (WTYPE) resn;
1520
1521 All users of "res" should then use "resn" instead, making the final
1522 statement dead (not marked as relevant). The final statement is still
1523 needed to maintain the type correctness of the IR.
1524
1525 vect_determine_precisions has already determined the minimum
1526 precison of the operation and the minimum precision required
1527 by users of the result. */
1528
1529 static gimple *
1530 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1531 {
1532 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1533 if (!last_stmt)
1534 return NULL;
1535
1536 /* See whether we have found that this operation can be done on a
1537 narrower type without changing its semantics. */
1538 unsigned int new_precision = last_stmt_info->operation_precision;
1539 if (!new_precision)
1540 return NULL;
1541
1542 vec_info *vinfo = last_stmt_info->vinfo;
1543 tree lhs = gimple_assign_lhs (last_stmt);
1544 tree type = TREE_TYPE (lhs);
1545 tree_code code = gimple_assign_rhs_code (last_stmt);
1546
1547 /* Keep the first operand of a COND_EXPR as-is: only the other two
1548 operands are interesting. */
1549 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1550
1551 /* Check the operands. */
1552 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1553 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1554 unprom.quick_grow (nops);
1555 unsigned int min_precision = 0;
1556 bool single_use_p = false;
1557 for (unsigned int i = 0; i < nops; ++i)
1558 {
1559 tree op = gimple_op (last_stmt, first_op + i);
1560 if (TREE_CODE (op) == INTEGER_CST)
1561 unprom[i].set_op (op, vect_constant_def);
1562 else if (TREE_CODE (op) == SSA_NAME)
1563 {
1564 bool op_single_use_p = true;
1565 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1566 &op_single_use_p))
1567 return NULL;
1568 /* If:
1569
1570 (1) N bits of the result are needed;
1571 (2) all inputs are widened from M<N bits; and
1572 (3) one operand OP is a single-use SSA name
1573
1574 we can shift the M->N widening from OP to the output
1575 without changing the number or type of extensions involved.
1576 This then reduces the number of copies of STMT_INFO.
1577
1578 If instead of (3) more than one operand is a single-use SSA name,
1579 shifting the extension to the output is even more of a win.
1580
1581 If instead:
1582
1583 (1) N bits of the result are needed;
1584 (2) one operand OP2 is widened from M2<N bits;
1585 (3) another operand OP1 is widened from M1<M2 bits; and
1586 (4) both OP1 and OP2 are single-use
1587
1588 the choice is between:
1589
1590 (a) truncating OP2 to M1, doing the operation on M1,
1591 and then widening the result to N
1592
1593 (b) widening OP1 to M2, doing the operation on M2, and then
1594 widening the result to N
1595
1596 Both shift the M2->N widening of the inputs to the output.
1597 (a) additionally shifts the M1->M2 widening to the output;
1598 it requires fewer copies of STMT_INFO but requires an extra
1599 M2->M1 truncation.
1600
1601 Which is better will depend on the complexity and cost of
1602 STMT_INFO, which is hard to predict at this stage. However,
1603 a clear tie-breaker in favor of (b) is the fact that the
1604 truncation in (a) increases the length of the operation chain.
1605
1606 If instead of (4) only one of OP1 or OP2 is single-use,
1607 (b) is still a win over doing the operation in N bits:
1608 it still shifts the M2->N widening on the single-use operand
1609 to the output and reduces the number of STMT_INFO copies.
1610
1611 If neither operand is single-use then operating on fewer than
1612 N bits might lead to more extensions overall. Whether it does
1613 or not depends on global information about the vectorization
1614 region, and whether that's a good trade-off would again
1615 depend on the complexity and cost of the statements involved,
1616 as well as things like register pressure that are not normally
1617 modelled at this stage. We therefore ignore these cases
1618 and just optimize the clear single-use wins above.
1619
1620 Thus we take the maximum precision of the unpromoted operands
1621 and record whether any operand is single-use. */
1622 if (unprom[i].dt == vect_internal_def)
1623 {
1624 min_precision = MAX (min_precision,
1625 TYPE_PRECISION (unprom[i].type));
1626 single_use_p |= op_single_use_p;
1627 }
1628 }
1629 }
1630
1631 /* Although the operation could be done in operation_precision, we have
1632 to balance that against introducing extra truncations or extensions.
1633 Calculate the minimum precision that can be handled efficiently.
1634
1635 The loop above determined that the operation could be handled
1636 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1637 extension from the inputs to the output without introducing more
1638 instructions, and would reduce the number of instructions required
1639 for STMT_INFO itself.
1640
1641 vect_determine_precisions has also determined that the result only
1642 needs min_output_precision bits. Truncating by a factor of N times
1643 requires a tree of N - 1 instructions, so if TYPE is N times wider
1644 than min_output_precision, doing the operation in TYPE and truncating
1645 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1646 In contrast:
1647
1648 - truncating the input to a unary operation and doing the operation
1649 in the new type requires at most N - 1 + 1 = N instructions per
1650 output vector
1651
1652 - doing the same for a binary operation requires at most
1653 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1654
1655 Both unary and binary operations require fewer instructions than
1656 this if the operands were extended from a suitable truncated form.
1657 Thus there is usually nothing to lose by doing operations in
1658 min_output_precision bits, but there can be something to gain. */
1659 if (!single_use_p)
1660 min_precision = last_stmt_info->min_output_precision;
1661 else
1662 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1663
1664 /* Apply the minimum efficient precision we just calculated. */
1665 if (new_precision < min_precision)
1666 new_precision = min_precision;
1667 if (new_precision >= TYPE_PRECISION (type))
1668 return NULL;
1669
1670 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1671
1672 *type_out = get_vectype_for_scalar_type (type);
1673 if (!*type_out)
1674 return NULL;
1675
1676 /* We've found a viable pattern. Get the new type of the operation. */
1677 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1678 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1679
1680 /* If we're truncating an operation, we need to make sure that we
1681 don't introduce new undefined overflow. The codes tested here are
1682 a subset of those accepted by vect_truncatable_operation_p. */
1683 tree op_type = new_type;
1684 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1685 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1686 op_type = build_nonstandard_integer_type (new_precision, true);
1687
1688 /* We specifically don't check here whether the target supports the
1689 new operation, since it might be something that a later pattern
1690 wants to rewrite anyway. If targets have a minimum element size
1691 for some optabs, we should pattern-match smaller ops to larger ops
1692 where beneficial. */
1693 tree new_vectype = get_vectype_for_scalar_type (new_type);
1694 tree op_vectype = get_vectype_for_scalar_type (op_type);
1695 if (!new_vectype || !op_vectype)
1696 return NULL;
1697
1698 if (dump_enabled_p ())
1699 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1700 type, new_type);
1701
1702 /* Calculate the rhs operands for an operation on OP_TYPE. */
1703 tree ops[3] = {};
1704 for (unsigned int i = 1; i < first_op; ++i)
1705 ops[i - 1] = gimple_op (last_stmt, i);
1706 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1707 op_type, &unprom[0], op_vectype);
1708
1709 /* Use the operation to produce a result of type OP_TYPE. */
1710 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1711 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1712 ops[0], ops[1], ops[2]);
1713 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1714
1715 if (dump_enabled_p ())
1716 dump_printf_loc (MSG_NOTE, vect_location,
1717 "created pattern stmt: %G", pattern_stmt);
1718
1719 /* Convert back to the original signedness, if OP_TYPE is different
1720 from NEW_TYPE. */
1721 if (op_type != new_type)
1722 pattern_stmt = vect_convert_output (last_stmt_info, new_type,
1723 pattern_stmt, op_vectype);
1724
1725 /* Promote the result to the original type. */
1726 pattern_stmt = vect_convert_output (last_stmt_info, type,
1727 pattern_stmt, new_vectype);
1728
1729 return pattern_stmt;
1730 }
1731
1732 /* Recognize the following patterns:
1733
1734 ATYPE a; // narrower than TYPE
1735 BTYPE b; // narrower than TYPE
1736
1737 1) Multiply high with scaling
1738 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1739 2) ... or also with rounding
1740 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1741
1742 where only the bottom half of res is used. */
1743
1744 static gimple *
1745 vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1746 {
1747 /* Check for a right shift. */
1748 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1749 if (!last_stmt
1750 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1751 return NULL;
1752 vec_info *vinfo = last_stmt_info->vinfo;
1753
1754 /* Check that the shift result is wider than the users of the
1755 result need (i.e. that narrowing would be a natural choice). */
1756 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1757 unsigned int target_precision
1758 = vect_element_precision (last_stmt_info->min_output_precision);
1759 if (!INTEGRAL_TYPE_P (lhs_type)
1760 || target_precision >= TYPE_PRECISION (lhs_type))
1761 return NULL;
1762
1763 /* Look through any change in sign on the outer shift input. */
1764 vect_unpromoted_value unprom_rshift_input;
1765 tree rshift_input = vect_look_through_possible_promotion
1766 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1767 if (!rshift_input
1768 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1769 != TYPE_PRECISION (lhs_type))
1770 return NULL;
1771
1772 /* Get the definition of the shift input. */
1773 stmt_vec_info rshift_input_stmt_info
1774 = vect_get_internal_def (vinfo, rshift_input);
1775 if (!rshift_input_stmt_info)
1776 return NULL;
1777 gassign *rshift_input_stmt
1778 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1779 if (!rshift_input_stmt)
1780 return NULL;
1781
1782 stmt_vec_info mulh_stmt_info;
1783 tree scale_term;
1784 internal_fn ifn;
1785 unsigned int expect_offset;
1786
1787 /* Check for the presence of the rounding term. */
1788 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1789 {
1790 /* Check that the outer shift was by 1. */
1791 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1792 return NULL;
1793
1794 /* Check that the second operand of the PLUS_EXPR is 1. */
1795 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1796 return NULL;
1797
1798 /* Look through any change in sign on the addition input. */
1799 vect_unpromoted_value unprom_plus_input;
1800 tree plus_input = vect_look_through_possible_promotion
1801 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1802 if (!plus_input
1803 || TYPE_PRECISION (TREE_TYPE (plus_input))
1804 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
1805 return NULL;
1806
1807 /* Get the definition of the multiply-high-scale part. */
1808 stmt_vec_info plus_input_stmt_info
1809 = vect_get_internal_def (vinfo, plus_input);
1810 if (!plus_input_stmt_info)
1811 return NULL;
1812 gassign *plus_input_stmt
1813 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1814 if (!plus_input_stmt
1815 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1816 return NULL;
1817
1818 /* Look through any change in sign on the scaling input. */
1819 vect_unpromoted_value unprom_scale_input;
1820 tree scale_input = vect_look_through_possible_promotion
1821 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1822 if (!scale_input
1823 || TYPE_PRECISION (TREE_TYPE (scale_input))
1824 != TYPE_PRECISION (TREE_TYPE (plus_input)))
1825 return NULL;
1826
1827 /* Get the definition of the multiply-high part. */
1828 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1829 if (!mulh_stmt_info)
1830 return NULL;
1831
1832 /* Get the scaling term. */
1833 scale_term = gimple_assign_rhs2 (plus_input_stmt);
1834
1835 expect_offset = target_precision + 2;
1836 ifn = IFN_MULHRS;
1837 }
1838 else
1839 {
1840 mulh_stmt_info = rshift_input_stmt_info;
1841 scale_term = gimple_assign_rhs2 (last_stmt);
1842
1843 expect_offset = target_precision + 1;
1844 ifn = IFN_MULHS;
1845 }
1846
1847 /* Check that the scaling factor is correct. */
1848 if (TREE_CODE (scale_term) != INTEGER_CST
1849 || wi::to_widest (scale_term) + expect_offset
1850 != TYPE_PRECISION (lhs_type))
1851 return NULL;
1852
1853 /* Check whether the scaling input term can be seen as two widened
1854 inputs multiplied together. */
1855 vect_unpromoted_value unprom_mult[2];
1856 tree new_type;
1857 unsigned int nops
1858 = vect_widened_op_tree (mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1859 false, 2, unprom_mult, &new_type);
1860 if (nops != 2)
1861 return NULL;
1862
1863 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1864
1865 /* Adjust output precision. */
1866 if (TYPE_PRECISION (new_type) < target_precision)
1867 new_type = build_nonstandard_integer_type
1868 (target_precision, TYPE_UNSIGNED (new_type));
1869
1870 /* Check for target support. */
1871 tree new_vectype = get_vectype_for_scalar_type (new_type);
1872 if (!new_vectype
1873 || !direct_internal_fn_supported_p
1874 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1875 return NULL;
1876
1877 /* The IR requires a valid vector type for the cast result, even though
1878 it's likely to be discarded. */
1879 *type_out = get_vectype_for_scalar_type (lhs_type);
1880 if (!*type_out)
1881 return NULL;
1882
1883 /* Generate the IFN_MULHRS call. */
1884 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1885 tree new_ops[2];
1886 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1887 unprom_mult, new_vectype);
1888 gcall *mulhrs_stmt
1889 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1890 gimple_call_set_lhs (mulhrs_stmt, new_var);
1891 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1892
1893 if (dump_enabled_p ())
1894 dump_printf_loc (MSG_NOTE, vect_location,
1895 "created pattern stmt: %G", mulhrs_stmt);
1896
1897 return vect_convert_output (last_stmt_info, lhs_type,
1898 mulhrs_stmt, new_vectype);
1899 }
1900
1901 /* Recognize the patterns:
1902
1903 ATYPE a; // narrower than TYPE
1904 BTYPE b; // narrower than TYPE
1905 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1906 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1907
1908 where only the bottom half of avg is used. Try to transform them into:
1909
1910 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1911 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1912
1913 followed by:
1914
1915 TYPE avg = (TYPE) avg';
1916
1917 where NTYPE is no wider than half of TYPE. Since only the bottom half
1918 of avg is used, all or part of the cast of avg' should become redundant. */
1919
1920 static gimple *
1921 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1922 {
1923 /* Check for a shift right by one bit. */
1924 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1925 vec_info *vinfo = last_stmt_info->vinfo;
1926 if (!last_stmt
1927 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1928 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1929 return NULL;
1930
1931 /* Check that the shift result is wider than the users of the
1932 result need (i.e. that narrowing would be a natural choice). */
1933 tree lhs = gimple_assign_lhs (last_stmt);
1934 tree type = TREE_TYPE (lhs);
1935 unsigned int target_precision
1936 = vect_element_precision (last_stmt_info->min_output_precision);
1937 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1938 return NULL;
1939
1940 /* Look through any change in sign on the shift input. */
1941 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1942 vect_unpromoted_value unprom_plus;
1943 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1944 &unprom_plus);
1945 if (!rshift_rhs
1946 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1947 return NULL;
1948
1949 /* Get the definition of the shift input. */
1950 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1951 if (!plus_stmt_info)
1952 return NULL;
1953
1954 /* Check whether the shift input can be seen as a tree of additions on
1955 2 or 3 widened inputs.
1956
1957 Note that the pattern should be a win even if the result of one or
1958 more additions is reused elsewhere: if the pattern matches, we'd be
1959 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1960 internal_fn ifn = IFN_AVG_FLOOR;
1961 vect_unpromoted_value unprom[3];
1962 tree new_type;
1963 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1964 PLUS_EXPR, false, 3,
1965 unprom, &new_type);
1966 if (nops == 0)
1967 return NULL;
1968 if (nops == 3)
1969 {
1970 /* Check that one operand is 1. */
1971 unsigned int i;
1972 for (i = 0; i < 3; ++i)
1973 if (integer_onep (unprom[i].op))
1974 break;
1975 if (i == 3)
1976 return NULL;
1977 /* Throw away the 1 operand and keep the other two. */
1978 if (i < 2)
1979 unprom[i] = unprom[2];
1980 ifn = IFN_AVG_CEIL;
1981 }
1982
1983 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1984
1985 /* We know that:
1986
1987 (a) the operation can be viewed as:
1988
1989 TYPE widened0 = (TYPE) UNPROM[0];
1990 TYPE widened1 = (TYPE) UNPROM[1];
1991 TYPE tmp1 = widened0 + widened1 {+ 1};
1992 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1993
1994 (b) the first two statements are equivalent to:
1995
1996 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1997 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1998
1999 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2000 where sensible;
2001
2002 (d) all the operations can be performed correctly at twice the width of
2003 NEW_TYPE, due to the nature of the average operation; and
2004
2005 (e) users of the result of the right shift need only TARGET_PRECISION
2006 bits, where TARGET_PRECISION is no more than half of TYPE's
2007 precision.
2008
2009 Under these circumstances, the only situation in which NEW_TYPE
2010 could be narrower than TARGET_PRECISION is if widened0, widened1
2011 and an addition result are all used more than once. Thus we can
2012 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2013 as "free", whereas widening the result of the average instruction
2014 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2015 therefore better not to go narrower than TARGET_PRECISION. */
2016 if (TYPE_PRECISION (new_type) < target_precision)
2017 new_type = build_nonstandard_integer_type (target_precision,
2018 TYPE_UNSIGNED (new_type));
2019
2020 /* Check for target support. */
2021 tree new_vectype = get_vectype_for_scalar_type (new_type);
2022 if (!new_vectype
2023 || !direct_internal_fn_supported_p (ifn, new_vectype,
2024 OPTIMIZE_FOR_SPEED))
2025 return NULL;
2026
2027 /* The IR requires a valid vector type for the cast result, even though
2028 it's likely to be discarded. */
2029 *type_out = get_vectype_for_scalar_type (type);
2030 if (!*type_out)
2031 return NULL;
2032
2033 /* Generate the IFN_AVG* call. */
2034 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2035 tree new_ops[2];
2036 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
2037 unprom, new_vectype);
2038 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2039 new_ops[1]);
2040 gimple_call_set_lhs (average_stmt, new_var);
2041 gimple_set_location (average_stmt, gimple_location (last_stmt));
2042
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_NOTE, vect_location,
2045 "created pattern stmt: %G", average_stmt);
2046
2047 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
2048 }
2049
2050 /* Recognize cases in which the input to a cast is wider than its
2051 output, and the input is fed by a widening operation. Fold this
2052 by removing the unnecessary intermediate widening. E.g.:
2053
2054 unsigned char a;
2055 unsigned int b = (unsigned int) a;
2056 unsigned short c = (unsigned short) b;
2057
2058 -->
2059
2060 unsigned short c = (unsigned short) a;
2061
2062 Although this is rare in input IR, it is an expected side-effect
2063 of the over-widening pattern above.
2064
2065 This is beneficial also for integer-to-float conversions, if the
2066 widened integer has more bits than the float, and if the unwidened
2067 input doesn't. */
2068
2069 static gimple *
2070 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
2071 {
2072 /* Check for a cast, including an integer-to-float conversion. */
2073 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2074 if (!last_stmt)
2075 return NULL;
2076 tree_code code = gimple_assign_rhs_code (last_stmt);
2077 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2078 return NULL;
2079
2080 /* Make sure that the rhs is a scalar with a natural bitsize. */
2081 tree lhs = gimple_assign_lhs (last_stmt);
2082 if (!lhs)
2083 return NULL;
2084 tree lhs_type = TREE_TYPE (lhs);
2085 scalar_mode lhs_mode;
2086 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2087 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2088 return NULL;
2089
2090 /* Check for a narrowing operation (from a vector point of view). */
2091 tree rhs = gimple_assign_rhs1 (last_stmt);
2092 tree rhs_type = TREE_TYPE (rhs);
2093 if (!INTEGRAL_TYPE_P (rhs_type)
2094 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2095 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2096 return NULL;
2097
2098 /* Try to find an unpromoted input. */
2099 vec_info *vinfo = last_stmt_info->vinfo;
2100 vect_unpromoted_value unprom;
2101 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2102 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2103 return NULL;
2104
2105 /* If the bits above RHS_TYPE matter, make sure that they're the
2106 same when extending from UNPROM as they are when extending from RHS. */
2107 if (!INTEGRAL_TYPE_P (lhs_type)
2108 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2109 return NULL;
2110
2111 /* We can get the same result by casting UNPROM directly, to avoid
2112 the unnecessary widening and narrowing. */
2113 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2114
2115 *type_out = get_vectype_for_scalar_type (lhs_type);
2116 if (!*type_out)
2117 return NULL;
2118
2119 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2120 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2121 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2122
2123 return pattern_stmt;
2124 }
2125
2126 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2127 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2128
2129 static gimple *
2130 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
2131 {
2132 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
2133 WIDEN_LSHIFT_EXPR, true,
2134 "vect_recog_widen_shift_pattern");
2135 }
2136
2137 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2138
2139 type a_t, b_t, c_t;
2140
2141 S0 a_t = b_t r<< c_t;
2142
2143 Input/Output:
2144
2145 * STMT_VINFO: The stmt from which the pattern search begins,
2146 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2147 with a sequence:
2148
2149 S1 d_t = -c_t;
2150 S2 e_t = d_t & (B - 1);
2151 S3 f_t = b_t << c_t;
2152 S4 g_t = b_t >> e_t;
2153 S0 a_t = f_t | g_t;
2154
2155 where B is element bitsize of type.
2156
2157 Output:
2158
2159 * TYPE_OUT: The type of the output of this pattern.
2160
2161 * Return value: A new stmt that will be used to replace the rotate
2162 S0 stmt. */
2163
2164 static gimple *
2165 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2166 {
2167 gimple *last_stmt = stmt_vinfo->stmt;
2168 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2169 gimple *pattern_stmt, *def_stmt;
2170 enum tree_code rhs_code;
2171 vec_info *vinfo = stmt_vinfo->vinfo;
2172 enum vect_def_type dt;
2173 optab optab1, optab2;
2174 edge ext_def = NULL;
2175 bool bswap16_p = false;
2176
2177 if (is_gimple_assign (last_stmt))
2178 {
2179 rhs_code = gimple_assign_rhs_code (last_stmt);
2180 switch (rhs_code)
2181 {
2182 case LROTATE_EXPR:
2183 case RROTATE_EXPR:
2184 break;
2185 default:
2186 return NULL;
2187 }
2188
2189 lhs = gimple_assign_lhs (last_stmt);
2190 oprnd0 = gimple_assign_rhs1 (last_stmt);
2191 type = TREE_TYPE (oprnd0);
2192 oprnd1 = gimple_assign_rhs2 (last_stmt);
2193 }
2194 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2195 {
2196 /* __builtin_bswap16 (x) is another form of x r>> 8.
2197 The vectorizer has bswap support, but only if the argument isn't
2198 promoted. */
2199 lhs = gimple_call_lhs (last_stmt);
2200 oprnd0 = gimple_call_arg (last_stmt, 0);
2201 type = TREE_TYPE (oprnd0);
2202 if (TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2203 || TYPE_PRECISION (type) <= 16
2204 || TREE_CODE (oprnd0) != SSA_NAME
2205 || BITS_PER_UNIT != 8
2206 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2207 return NULL;
2208
2209 stmt_vec_info def_stmt_info;
2210 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2211 return NULL;
2212
2213 if (dt != vect_internal_def)
2214 return NULL;
2215
2216 if (gimple_assign_cast_p (def_stmt))
2217 {
2218 def = gimple_assign_rhs1 (def_stmt);
2219 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2220 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2221 oprnd0 = def;
2222 }
2223
2224 type = TREE_TYPE (lhs);
2225 vectype = get_vectype_for_scalar_type (type);
2226 if (vectype == NULL_TREE)
2227 return NULL;
2228
2229 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2230 {
2231 /* The encoding uses one stepped pattern for each byte in the
2232 16-bit word. */
2233 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2234 for (unsigned i = 0; i < 3; ++i)
2235 for (unsigned j = 0; j < 2; ++j)
2236 elts.quick_push ((i + 1) * 2 - j - 1);
2237
2238 vec_perm_indices indices (elts, 1,
2239 TYPE_VECTOR_SUBPARTS (char_vectype));
2240 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2241 {
2242 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2243 undo the argument promotion. */
2244 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2245 {
2246 def = vect_recog_temp_ssa_var (type, NULL);
2247 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2248 append_pattern_def_seq (stmt_vinfo, def_stmt);
2249 oprnd0 = def;
2250 }
2251
2252 /* Pattern detected. */
2253 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2254
2255 *type_out = vectype;
2256
2257 /* Pattern supported. Create a stmt to be used to replace the
2258 pattern, with the unpromoted argument. */
2259 var = vect_recog_temp_ssa_var (type, NULL);
2260 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2261 1, oprnd0);
2262 gimple_call_set_lhs (pattern_stmt, var);
2263 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2264 gimple_call_fntype (last_stmt));
2265 return pattern_stmt;
2266 }
2267 }
2268
2269 oprnd1 = build_int_cst (integer_type_node, 8);
2270 rhs_code = LROTATE_EXPR;
2271 bswap16_p = true;
2272 }
2273 else
2274 return NULL;
2275
2276 if (TREE_CODE (oprnd0) != SSA_NAME
2277 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2278 || !INTEGRAL_TYPE_P (type)
2279 || !TYPE_UNSIGNED (type))
2280 return NULL;
2281
2282 stmt_vec_info def_stmt_info;
2283 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2284 return NULL;
2285
2286 if (dt != vect_internal_def
2287 && dt != vect_constant_def
2288 && dt != vect_external_def)
2289 return NULL;
2290
2291 vectype = get_vectype_for_scalar_type (type);
2292 if (vectype == NULL_TREE)
2293 return NULL;
2294
2295 /* If vector/vector or vector/scalar rotate is supported by the target,
2296 don't do anything here. */
2297 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2298 if (optab1
2299 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2300 {
2301 use_rotate:
2302 if (bswap16_p)
2303 {
2304 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2305 {
2306 def = vect_recog_temp_ssa_var (type, NULL);
2307 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2308 append_pattern_def_seq (stmt_vinfo, def_stmt);
2309 oprnd0 = def;
2310 }
2311
2312 /* Pattern detected. */
2313 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2314
2315 *type_out = vectype;
2316
2317 /* Pattern supported. Create a stmt to be used to replace the
2318 pattern. */
2319 var = vect_recog_temp_ssa_var (type, NULL);
2320 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2321 oprnd1);
2322 return pattern_stmt;
2323 }
2324 return NULL;
2325 }
2326
2327 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2328 {
2329 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2330 if (optab2
2331 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2332 goto use_rotate;
2333 }
2334
2335 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2336 don't do anything here either. */
2337 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2338 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2339 if (!optab1
2340 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2341 || !optab2
2342 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2343 {
2344 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2345 return NULL;
2346 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2347 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2348 if (!optab1
2349 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2350 || !optab2
2351 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2352 return NULL;
2353 }
2354
2355 *type_out = vectype;
2356
2357 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2358 {
2359 def = vect_recog_temp_ssa_var (type, NULL);
2360 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2361 append_pattern_def_seq (stmt_vinfo, def_stmt);
2362 oprnd0 = def;
2363 }
2364
2365 if (dt == vect_external_def
2366 && TREE_CODE (oprnd1) == SSA_NAME)
2367 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2368
2369 def = NULL_TREE;
2370 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2371 if (TREE_CODE (oprnd1) == INTEGER_CST
2372 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2373 def = oprnd1;
2374 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2375 {
2376 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2377 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2378 && TYPE_PRECISION (TREE_TYPE (rhs1))
2379 == TYPE_PRECISION (type))
2380 def = rhs1;
2381 }
2382
2383 if (def == NULL_TREE)
2384 {
2385 def = vect_recog_temp_ssa_var (type, NULL);
2386 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2387 if (ext_def)
2388 {
2389 basic_block new_bb
2390 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2391 gcc_assert (!new_bb);
2392 }
2393 else
2394 append_pattern_def_seq (stmt_vinfo, def_stmt);
2395 }
2396 stype = TREE_TYPE (def);
2397 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2398
2399 if (TREE_CODE (def) == INTEGER_CST)
2400 {
2401 if (!tree_fits_uhwi_p (def)
2402 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2403 || integer_zerop (def))
2404 return NULL;
2405 def2 = build_int_cst (stype,
2406 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2407 }
2408 else
2409 {
2410 tree vecstype = get_vectype_for_scalar_type (stype);
2411
2412 if (vecstype == NULL_TREE)
2413 return NULL;
2414 def2 = vect_recog_temp_ssa_var (stype, NULL);
2415 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2416 if (ext_def)
2417 {
2418 basic_block new_bb
2419 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2420 gcc_assert (!new_bb);
2421 }
2422 else
2423 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2424
2425 def2 = vect_recog_temp_ssa_var (stype, NULL);
2426 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2427 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2428 gimple_assign_lhs (def_stmt), mask);
2429 if (ext_def)
2430 {
2431 basic_block new_bb
2432 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2433 gcc_assert (!new_bb);
2434 }
2435 else
2436 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2437 }
2438
2439 var1 = vect_recog_temp_ssa_var (type, NULL);
2440 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2441 ? LSHIFT_EXPR : RSHIFT_EXPR,
2442 oprnd0, def);
2443 append_pattern_def_seq (stmt_vinfo, def_stmt);
2444
2445 var2 = vect_recog_temp_ssa_var (type, NULL);
2446 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2447 ? RSHIFT_EXPR : LSHIFT_EXPR,
2448 oprnd0, def2);
2449 append_pattern_def_seq (stmt_vinfo, def_stmt);
2450
2451 /* Pattern detected. */
2452 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2453
2454 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2455 var = vect_recog_temp_ssa_var (type, NULL);
2456 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2457
2458 return pattern_stmt;
2459 }
2460
2461 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2462 vectorized:
2463
2464 type a_t;
2465 TYPE b_T, res_T;
2466
2467 S1 a_t = ;
2468 S2 b_T = ;
2469 S3 res_T = b_T op a_t;
2470
2471 where type 'TYPE' is a type with different size than 'type',
2472 and op is <<, >> or rotate.
2473
2474 Also detect cases:
2475
2476 type a_t;
2477 TYPE b_T, c_T, res_T;
2478
2479 S0 c_T = ;
2480 S1 a_t = (type) c_T;
2481 S2 b_T = ;
2482 S3 res_T = b_T op a_t;
2483
2484 Input/Output:
2485
2486 * STMT_VINFO: The stmt from which the pattern search begins,
2487 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2488 with a shift/rotate which has same type on both operands, in the
2489 second case just b_T op c_T, in the first case with added cast
2490 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2491
2492 Output:
2493
2494 * TYPE_OUT: The type of the output of this pattern.
2495
2496 * Return value: A new stmt that will be used to replace the shift/rotate
2497 S3 stmt. */
2498
2499 static gimple *
2500 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2501 tree *type_out)
2502 {
2503 gimple *last_stmt = stmt_vinfo->stmt;
2504 tree oprnd0, oprnd1, lhs, var;
2505 gimple *pattern_stmt;
2506 enum tree_code rhs_code;
2507 vec_info *vinfo = stmt_vinfo->vinfo;
2508
2509 if (!is_gimple_assign (last_stmt))
2510 return NULL;
2511
2512 rhs_code = gimple_assign_rhs_code (last_stmt);
2513 switch (rhs_code)
2514 {
2515 case LSHIFT_EXPR:
2516 case RSHIFT_EXPR:
2517 case LROTATE_EXPR:
2518 case RROTATE_EXPR:
2519 break;
2520 default:
2521 return NULL;
2522 }
2523
2524 lhs = gimple_assign_lhs (last_stmt);
2525 oprnd0 = gimple_assign_rhs1 (last_stmt);
2526 oprnd1 = gimple_assign_rhs2 (last_stmt);
2527 if (TREE_CODE (oprnd0) != SSA_NAME
2528 || TREE_CODE (oprnd1) != SSA_NAME
2529 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2530 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2531 || TYPE_PRECISION (TREE_TYPE (lhs))
2532 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2533 return NULL;
2534
2535 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2536 if (!def_vinfo)
2537 return NULL;
2538
2539 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2540 if (*type_out == NULL_TREE)
2541 return NULL;
2542
2543 tree def = NULL_TREE;
2544 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2545 if (def_stmt && gimple_assign_cast_p (def_stmt))
2546 {
2547 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2548 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2549 && TYPE_PRECISION (TREE_TYPE (rhs1))
2550 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2551 {
2552 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2553 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2554 def = rhs1;
2555 else
2556 {
2557 tree mask
2558 = build_low_bits_mask (TREE_TYPE (rhs1),
2559 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2560 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2561 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2562 tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2563 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2564 }
2565 }
2566 }
2567
2568 if (def == NULL_TREE)
2569 {
2570 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2571 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2572 append_pattern_def_seq (stmt_vinfo, def_stmt);
2573 }
2574
2575 /* Pattern detected. */
2576 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2577
2578 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2579 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2580 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2581
2582 return pattern_stmt;
2583 }
2584
2585 /* Return true iff the target has a vector optab implementing the operation
2586 CODE on type VECTYPE. */
2587
2588 static bool
2589 target_has_vecop_for_code (tree_code code, tree vectype)
2590 {
2591 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2592 return voptab
2593 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2594 }
2595
2596 /* Verify that the target has optabs of VECTYPE to perform all the steps
2597 needed by the multiplication-by-immediate synthesis algorithm described by
2598 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2599 present. Return true iff the target supports all the steps. */
2600
2601 static bool
2602 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2603 tree vectype, bool synth_shift_p)
2604 {
2605 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2606 return false;
2607
2608 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2609 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2610
2611 if (var == negate_variant
2612 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2613 return false;
2614
2615 /* If we must synthesize shifts with additions make sure that vector
2616 addition is available. */
2617 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2618 return false;
2619
2620 for (int i = 1; i < alg->ops; i++)
2621 {
2622 switch (alg->op[i])
2623 {
2624 case alg_shift:
2625 break;
2626 case alg_add_t_m2:
2627 case alg_add_t2_m:
2628 case alg_add_factor:
2629 if (!supports_vplus)
2630 return false;
2631 break;
2632 case alg_sub_t_m2:
2633 case alg_sub_t2_m:
2634 case alg_sub_factor:
2635 if (!supports_vminus)
2636 return false;
2637 break;
2638 case alg_unknown:
2639 case alg_m:
2640 case alg_zero:
2641 case alg_impossible:
2642 return false;
2643 default:
2644 gcc_unreachable ();
2645 }
2646 }
2647
2648 return true;
2649 }
2650
2651 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2652 putting the final result in DEST. Append all statements but the last into
2653 VINFO. Return the last statement. */
2654
2655 static gimple *
2656 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2657 stmt_vec_info vinfo)
2658 {
2659 HOST_WIDE_INT i;
2660 tree itype = TREE_TYPE (op);
2661 tree prev_res = op;
2662 gcc_assert (amnt >= 0);
2663 for (i = 0; i < amnt; i++)
2664 {
2665 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2666 : dest;
2667 gimple *stmt
2668 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2669 prev_res = tmp_var;
2670 if (i < amnt - 1)
2671 append_pattern_def_seq (vinfo, stmt);
2672 else
2673 return stmt;
2674 }
2675 gcc_unreachable ();
2676 return NULL;
2677 }
2678
2679 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2680 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2681 the process if necessary. Append the resulting assignment statements
2682 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2683 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2684 left shifts using additions. */
2685
2686 static tree
2687 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2688 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2689 {
2690 if (integer_zerop (op2)
2691 && (code == LSHIFT_EXPR
2692 || code == PLUS_EXPR))
2693 {
2694 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2695 return op1;
2696 }
2697
2698 gimple *stmt;
2699 tree itype = TREE_TYPE (op1);
2700 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2701
2702 if (code == LSHIFT_EXPR
2703 && synth_shift_p)
2704 {
2705 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2706 stmt_vinfo);
2707 append_pattern_def_seq (stmt_vinfo, stmt);
2708 return tmp_var;
2709 }
2710
2711 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2712 append_pattern_def_seq (stmt_vinfo, stmt);
2713 return tmp_var;
2714 }
2715
2716 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2717 and simple arithmetic operations to be vectorized. Record the statements
2718 produced in STMT_VINFO and return the last statement in the sequence or
2719 NULL if it's not possible to synthesize such a multiplication.
2720 This function mirrors the behavior of expand_mult_const in expmed.c but
2721 works on tree-ssa form. */
2722
2723 static gimple *
2724 vect_synth_mult_by_constant (tree op, tree val,
2725 stmt_vec_info stmt_vinfo)
2726 {
2727 tree itype = TREE_TYPE (op);
2728 machine_mode mode = TYPE_MODE (itype);
2729 struct algorithm alg;
2730 mult_variant variant;
2731 if (!tree_fits_shwi_p (val))
2732 return NULL;
2733
2734 /* Multiplication synthesis by shifts, adds and subs can introduce
2735 signed overflow where the original operation didn't. Perform the
2736 operations on an unsigned type and cast back to avoid this.
2737 In the future we may want to relax this for synthesis algorithms
2738 that we can prove do not cause unexpected overflow. */
2739 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2740
2741 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2742
2743 /* Targets that don't support vector shifts but support vector additions
2744 can synthesize shifts that way. */
2745 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2746
2747 HOST_WIDE_INT hwval = tree_to_shwi (val);
2748 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2749 The vectorizer's benefit analysis will decide whether it's beneficial
2750 to do this. */
2751 bool possible = choose_mult_variant (mode, hwval, &alg,
2752 &variant, MAX_COST);
2753 if (!possible)
2754 return NULL;
2755
2756 tree vectype = get_vectype_for_scalar_type (multtype);
2757
2758 if (!vectype
2759 || !target_supports_mult_synth_alg (&alg, variant,
2760 vectype, synth_shift_p))
2761 return NULL;
2762
2763 tree accumulator;
2764
2765 /* Clear out the sequence of statements so we can populate it below. */
2766 gimple *stmt = NULL;
2767
2768 if (cast_to_unsigned_p)
2769 {
2770 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2771 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2772 append_pattern_def_seq (stmt_vinfo, stmt);
2773 op = tmp_op;
2774 }
2775
2776 if (alg.op[0] == alg_zero)
2777 accumulator = build_int_cst (multtype, 0);
2778 else
2779 accumulator = op;
2780
2781 bool needs_fixup = (variant == negate_variant)
2782 || (variant == add_variant);
2783
2784 for (int i = 1; i < alg.ops; i++)
2785 {
2786 tree shft_log = build_int_cst (multtype, alg.log[i]);
2787 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2788 tree tmp_var = NULL_TREE;
2789
2790 switch (alg.op[i])
2791 {
2792 case alg_shift:
2793 if (synth_shift_p)
2794 stmt
2795 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2796 stmt_vinfo);
2797 else
2798 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2799 shft_log);
2800 break;
2801 case alg_add_t_m2:
2802 tmp_var
2803 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2804 stmt_vinfo, synth_shift_p);
2805 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2806 tmp_var);
2807 break;
2808 case alg_sub_t_m2:
2809 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2810 shft_log, stmt_vinfo,
2811 synth_shift_p);
2812 /* In some algorithms the first step involves zeroing the
2813 accumulator. If subtracting from such an accumulator
2814 just emit the negation directly. */
2815 if (integer_zerop (accumulator))
2816 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2817 else
2818 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2819 tmp_var);
2820 break;
2821 case alg_add_t2_m:
2822 tmp_var
2823 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2824 stmt_vinfo, synth_shift_p);
2825 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2826 break;
2827 case alg_sub_t2_m:
2828 tmp_var
2829 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2830 stmt_vinfo, synth_shift_p);
2831 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2832 break;
2833 case alg_add_factor:
2834 tmp_var
2835 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2836 stmt_vinfo, synth_shift_p);
2837 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2838 tmp_var);
2839 break;
2840 case alg_sub_factor:
2841 tmp_var
2842 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2843 stmt_vinfo, synth_shift_p);
2844 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2845 accumulator);
2846 break;
2847 default:
2848 gcc_unreachable ();
2849 }
2850 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2851 but rather return it directly. */
2852
2853 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2854 append_pattern_def_seq (stmt_vinfo, stmt);
2855 accumulator = accum_tmp;
2856 }
2857 if (variant == negate_variant)
2858 {
2859 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2860 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2861 accumulator = accum_tmp;
2862 if (cast_to_unsigned_p)
2863 append_pattern_def_seq (stmt_vinfo, stmt);
2864 }
2865 else if (variant == add_variant)
2866 {
2867 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2868 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2869 accumulator = accum_tmp;
2870 if (cast_to_unsigned_p)
2871 append_pattern_def_seq (stmt_vinfo, stmt);
2872 }
2873 /* Move back to a signed if needed. */
2874 if (cast_to_unsigned_p)
2875 {
2876 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2877 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2878 }
2879
2880 return stmt;
2881 }
2882
2883 /* Detect multiplication by constant and convert it into a sequence of
2884 shifts and additions, subtractions, negations. We reuse the
2885 choose_mult_variant algorithms from expmed.c
2886
2887 Input/Output:
2888
2889 STMT_VINFO: The stmt from which the pattern search begins,
2890 i.e. the mult stmt.
2891
2892 Output:
2893
2894 * TYPE_OUT: The type of the output of this pattern.
2895
2896 * Return value: A new stmt that will be used to replace
2897 the multiplication. */
2898
2899 static gimple *
2900 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2901 {
2902 gimple *last_stmt = stmt_vinfo->stmt;
2903 tree oprnd0, oprnd1, vectype, itype;
2904 gimple *pattern_stmt;
2905
2906 if (!is_gimple_assign (last_stmt))
2907 return NULL;
2908
2909 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2910 return NULL;
2911
2912 oprnd0 = gimple_assign_rhs1 (last_stmt);
2913 oprnd1 = gimple_assign_rhs2 (last_stmt);
2914 itype = TREE_TYPE (oprnd0);
2915
2916 if (TREE_CODE (oprnd0) != SSA_NAME
2917 || TREE_CODE (oprnd1) != INTEGER_CST
2918 || !INTEGRAL_TYPE_P (itype)
2919 || !type_has_mode_precision_p (itype))
2920 return NULL;
2921
2922 vectype = get_vectype_for_scalar_type (itype);
2923 if (vectype == NULL_TREE)
2924 return NULL;
2925
2926 /* If the target can handle vectorized multiplication natively,
2927 don't attempt to optimize this. */
2928 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2929 if (mul_optab != unknown_optab)
2930 {
2931 machine_mode vec_mode = TYPE_MODE (vectype);
2932 int icode = (int) optab_handler (mul_optab, vec_mode);
2933 if (icode != CODE_FOR_nothing)
2934 return NULL;
2935 }
2936
2937 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2938 if (!pattern_stmt)
2939 return NULL;
2940
2941 /* Pattern detected. */
2942 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2943
2944 *type_out = vectype;
2945
2946 return pattern_stmt;
2947 }
2948
2949 /* Detect a signed division by a constant that wouldn't be
2950 otherwise vectorized:
2951
2952 type a_t, b_t;
2953
2954 S1 a_t = b_t / N;
2955
2956 where type 'type' is an integral type and N is a constant.
2957
2958 Similarly handle modulo by a constant:
2959
2960 S4 a_t = b_t % N;
2961
2962 Input/Output:
2963
2964 * STMT_VINFO: The stmt from which the pattern search begins,
2965 i.e. the division stmt. S1 is replaced by if N is a power
2966 of two constant and type is signed:
2967 S3 y_t = b_t < 0 ? N - 1 : 0;
2968 S2 x_t = b_t + y_t;
2969 S1' a_t = x_t >> log2 (N);
2970
2971 S4 is replaced if N is a power of two constant and
2972 type is signed by (where *_T temporaries have unsigned type):
2973 S9 y_T = b_t < 0 ? -1U : 0U;
2974 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2975 S7 z_t = (type) z_T;
2976 S6 w_t = b_t + z_t;
2977 S5 x_t = w_t & (N - 1);
2978 S4' a_t = x_t - z_t;
2979
2980 Output:
2981
2982 * TYPE_OUT: The type of the output of this pattern.
2983
2984 * Return value: A new stmt that will be used to replace the division
2985 S1 or modulo S4 stmt. */
2986
2987 static gimple *
2988 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2989 {
2990 gimple *last_stmt = stmt_vinfo->stmt;
2991 tree oprnd0, oprnd1, vectype, itype, cond;
2992 gimple *pattern_stmt, *def_stmt;
2993 enum tree_code rhs_code;
2994 optab optab;
2995 tree q;
2996 int dummy_int, prec;
2997
2998 if (!is_gimple_assign (last_stmt))
2999 return NULL;
3000
3001 rhs_code = gimple_assign_rhs_code (last_stmt);
3002 switch (rhs_code)
3003 {
3004 case TRUNC_DIV_EXPR:
3005 case EXACT_DIV_EXPR:
3006 case TRUNC_MOD_EXPR:
3007 break;
3008 default:
3009 return NULL;
3010 }
3011
3012 oprnd0 = gimple_assign_rhs1 (last_stmt);
3013 oprnd1 = gimple_assign_rhs2 (last_stmt);
3014 itype = TREE_TYPE (oprnd0);
3015 if (TREE_CODE (oprnd0) != SSA_NAME
3016 || TREE_CODE (oprnd1) != INTEGER_CST
3017 || TREE_CODE (itype) != INTEGER_TYPE
3018 || !type_has_mode_precision_p (itype))
3019 return NULL;
3020
3021 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3022 vectype = get_vectype_for_scalar_type (itype);
3023 if (vectype == NULL_TREE)
3024 return NULL;
3025
3026 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3027 {
3028 /* If the target can handle vectorized division or modulo natively,
3029 don't attempt to optimize this, since native division is likely
3030 to give smaller code. */
3031 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3032 if (optab != unknown_optab)
3033 {
3034 machine_mode vec_mode = TYPE_MODE (vectype);
3035 int icode = (int) optab_handler (optab, vec_mode);
3036 if (icode != CODE_FOR_nothing)
3037 return NULL;
3038 }
3039 }
3040
3041 prec = TYPE_PRECISION (itype);
3042 if (integer_pow2p (oprnd1))
3043 {
3044 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3045 return NULL;
3046
3047 /* Pattern detected. */
3048 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3049
3050 *type_out = vectype;
3051
3052 /* Check if the target supports this internal function. */
3053 internal_fn ifn = IFN_DIV_POW2;
3054 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3055 {
3056 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3057
3058 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3059 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3060 gimple_call_set_lhs (div_stmt, var_div);
3061
3062 if (rhs_code == TRUNC_MOD_EXPR)
3063 {
3064 append_pattern_def_seq (stmt_vinfo, div_stmt);
3065 def_stmt
3066 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3067 LSHIFT_EXPR, var_div, shift);
3068 append_pattern_def_seq (stmt_vinfo, def_stmt);
3069 pattern_stmt
3070 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3071 MINUS_EXPR, oprnd0,
3072 gimple_assign_lhs (def_stmt));
3073 }
3074 else
3075 pattern_stmt = div_stmt;
3076 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3077
3078 return pattern_stmt;
3079 }
3080
3081 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3082 build_int_cst (itype, 0));
3083 if (rhs_code == TRUNC_DIV_EXPR
3084 || rhs_code == EXACT_DIV_EXPR)
3085 {
3086 tree var = vect_recog_temp_ssa_var (itype, NULL);
3087 tree shift;
3088 def_stmt
3089 = gimple_build_assign (var, COND_EXPR, cond,
3090 fold_build2 (MINUS_EXPR, itype, oprnd1,
3091 build_int_cst (itype, 1)),
3092 build_int_cst (itype, 0));
3093 append_pattern_def_seq (stmt_vinfo, def_stmt);
3094 var = vect_recog_temp_ssa_var (itype, NULL);
3095 def_stmt
3096 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3097 gimple_assign_lhs (def_stmt));
3098 append_pattern_def_seq (stmt_vinfo, def_stmt);
3099
3100 shift = build_int_cst (itype, tree_log2 (oprnd1));
3101 pattern_stmt
3102 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3103 RSHIFT_EXPR, var, shift);
3104 }
3105 else
3106 {
3107 tree signmask;
3108 if (compare_tree_int (oprnd1, 2) == 0)
3109 {
3110 signmask = vect_recog_temp_ssa_var (itype, NULL);
3111 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3112 build_int_cst (itype, 1),
3113 build_int_cst (itype, 0));
3114 append_pattern_def_seq (stmt_vinfo, def_stmt);
3115 }
3116 else
3117 {
3118 tree utype
3119 = build_nonstandard_integer_type (prec, 1);
3120 tree vecutype = get_vectype_for_scalar_type (utype);
3121 tree shift
3122 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3123 - tree_log2 (oprnd1));
3124 tree var = vect_recog_temp_ssa_var (utype, NULL);
3125
3126 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3127 build_int_cst (utype, -1),
3128 build_int_cst (utype, 0));
3129 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
3130 var = vect_recog_temp_ssa_var (utype, NULL);
3131 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3132 gimple_assign_lhs (def_stmt),
3133 shift);
3134 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
3135 signmask = vect_recog_temp_ssa_var (itype, NULL);
3136 def_stmt
3137 = gimple_build_assign (signmask, NOP_EXPR, var);
3138 append_pattern_def_seq (stmt_vinfo, def_stmt);
3139 }
3140 def_stmt
3141 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3142 PLUS_EXPR, oprnd0, signmask);
3143 append_pattern_def_seq (stmt_vinfo, def_stmt);
3144 def_stmt
3145 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3146 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3147 fold_build2 (MINUS_EXPR, itype, oprnd1,
3148 build_int_cst (itype, 1)));
3149 append_pattern_def_seq (stmt_vinfo, def_stmt);
3150
3151 pattern_stmt
3152 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3153 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3154 signmask);
3155 }
3156
3157 return pattern_stmt;
3158 }
3159
3160 if (prec > HOST_BITS_PER_WIDE_INT
3161 || integer_zerop (oprnd1))
3162 return NULL;
3163
3164 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3165 return NULL;
3166
3167 if (TYPE_UNSIGNED (itype))
3168 {
3169 unsigned HOST_WIDE_INT mh, ml;
3170 int pre_shift, post_shift;
3171 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3172 & GET_MODE_MASK (itype_mode));
3173 tree t1, t2, t3, t4;
3174
3175 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3176 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3177 return NULL;
3178
3179 /* Find a suitable multiplier and right shift count
3180 instead of multiplying with D. */
3181 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3182
3183 /* If the suggested multiplier is more than SIZE bits, we can do better
3184 for even divisors, using an initial right shift. */
3185 if (mh != 0 && (d & 1) == 0)
3186 {
3187 pre_shift = ctz_or_zero (d);
3188 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3189 &ml, &post_shift, &dummy_int);
3190 gcc_assert (!mh);
3191 }
3192 else
3193 pre_shift = 0;
3194
3195 if (mh != 0)
3196 {
3197 if (post_shift - 1 >= prec)
3198 return NULL;
3199
3200 /* t1 = oprnd0 h* ml;
3201 t2 = oprnd0 - t1;
3202 t3 = t2 >> 1;
3203 t4 = t1 + t3;
3204 q = t4 >> (post_shift - 1); */
3205 t1 = vect_recog_temp_ssa_var (itype, NULL);
3206 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3207 build_int_cst (itype, ml));
3208 append_pattern_def_seq (stmt_vinfo, def_stmt);
3209
3210 t2 = vect_recog_temp_ssa_var (itype, NULL);
3211 def_stmt
3212 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3213 append_pattern_def_seq (stmt_vinfo, def_stmt);
3214
3215 t3 = vect_recog_temp_ssa_var (itype, NULL);
3216 def_stmt
3217 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3218 append_pattern_def_seq (stmt_vinfo, def_stmt);
3219
3220 t4 = vect_recog_temp_ssa_var (itype, NULL);
3221 def_stmt
3222 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3223
3224 if (post_shift != 1)
3225 {
3226 append_pattern_def_seq (stmt_vinfo, def_stmt);
3227
3228 q = vect_recog_temp_ssa_var (itype, NULL);
3229 pattern_stmt
3230 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3231 build_int_cst (itype, post_shift - 1));
3232 }
3233 else
3234 {
3235 q = t4;
3236 pattern_stmt = def_stmt;
3237 }
3238 }
3239 else
3240 {
3241 if (pre_shift >= prec || post_shift >= prec)
3242 return NULL;
3243
3244 /* t1 = oprnd0 >> pre_shift;
3245 t2 = t1 h* ml;
3246 q = t2 >> post_shift; */
3247 if (pre_shift)
3248 {
3249 t1 = vect_recog_temp_ssa_var (itype, NULL);
3250 def_stmt
3251 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3252 build_int_cst (NULL, pre_shift));
3253 append_pattern_def_seq (stmt_vinfo, def_stmt);
3254 }
3255 else
3256 t1 = oprnd0;
3257
3258 t2 = vect_recog_temp_ssa_var (itype, NULL);
3259 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3260 build_int_cst (itype, ml));
3261
3262 if (post_shift)
3263 {
3264 append_pattern_def_seq (stmt_vinfo, def_stmt);
3265
3266 q = vect_recog_temp_ssa_var (itype, NULL);
3267 def_stmt
3268 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3269 build_int_cst (itype, post_shift));
3270 }
3271 else
3272 q = t2;
3273
3274 pattern_stmt = def_stmt;
3275 }
3276 }
3277 else
3278 {
3279 unsigned HOST_WIDE_INT ml;
3280 int post_shift;
3281 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3282 unsigned HOST_WIDE_INT abs_d;
3283 bool add = false;
3284 tree t1, t2, t3, t4;
3285
3286 /* Give up for -1. */
3287 if (d == -1)
3288 return NULL;
3289
3290 /* Since d might be INT_MIN, we have to cast to
3291 unsigned HOST_WIDE_INT before negating to avoid
3292 undefined signed overflow. */
3293 abs_d = (d >= 0
3294 ? (unsigned HOST_WIDE_INT) d
3295 : - (unsigned HOST_WIDE_INT) d);
3296
3297 /* n rem d = n rem -d */
3298 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3299 {
3300 d = abs_d;
3301 oprnd1 = build_int_cst (itype, abs_d);
3302 }
3303 else if (HOST_BITS_PER_WIDE_INT >= prec
3304 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3305 /* This case is not handled correctly below. */
3306 return NULL;
3307
3308 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3309 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3310 {
3311 add = true;
3312 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3313 }
3314 if (post_shift >= prec)
3315 return NULL;
3316
3317 /* t1 = oprnd0 h* ml; */
3318 t1 = vect_recog_temp_ssa_var (itype, NULL);
3319 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3320 build_int_cst (itype, ml));
3321
3322 if (add)
3323 {
3324 /* t2 = t1 + oprnd0; */
3325 append_pattern_def_seq (stmt_vinfo, def_stmt);
3326 t2 = vect_recog_temp_ssa_var (itype, NULL);
3327 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3328 }
3329 else
3330 t2 = t1;
3331
3332 if (post_shift)
3333 {
3334 /* t3 = t2 >> post_shift; */
3335 append_pattern_def_seq (stmt_vinfo, def_stmt);
3336 t3 = vect_recog_temp_ssa_var (itype, NULL);
3337 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3338 build_int_cst (itype, post_shift));
3339 }
3340 else
3341 t3 = t2;
3342
3343 wide_int oprnd0_min, oprnd0_max;
3344 int msb = 1;
3345 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3346 {
3347 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3348 msb = 0;
3349 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3350 msb = -1;
3351 }
3352
3353 if (msb == 0 && d >= 0)
3354 {
3355 /* q = t3; */
3356 q = t3;
3357 pattern_stmt = def_stmt;
3358 }
3359 else
3360 {
3361 /* t4 = oprnd0 >> (prec - 1);
3362 or if we know from VRP that oprnd0 >= 0
3363 t4 = 0;
3364 or if we know from VRP that oprnd0 < 0
3365 t4 = -1; */
3366 append_pattern_def_seq (stmt_vinfo, def_stmt);
3367 t4 = vect_recog_temp_ssa_var (itype, NULL);
3368 if (msb != 1)
3369 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3370 build_int_cst (itype, msb));
3371 else
3372 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3373 build_int_cst (itype, prec - 1));
3374 append_pattern_def_seq (stmt_vinfo, def_stmt);
3375
3376 /* q = t3 - t4; or q = t4 - t3; */
3377 q = vect_recog_temp_ssa_var (itype, NULL);
3378 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3379 d < 0 ? t3 : t4);
3380 }
3381 }
3382
3383 if (rhs_code == TRUNC_MOD_EXPR)
3384 {
3385 tree r, t1;
3386
3387 /* We divided. Now finish by:
3388 t1 = q * oprnd1;
3389 r = oprnd0 - t1; */
3390 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3391
3392 t1 = vect_recog_temp_ssa_var (itype, NULL);
3393 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3394 append_pattern_def_seq (stmt_vinfo, def_stmt);
3395
3396 r = vect_recog_temp_ssa_var (itype, NULL);
3397 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3398 }
3399
3400 /* Pattern detected. */
3401 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3402
3403 *type_out = vectype;
3404 return pattern_stmt;
3405 }
3406
3407 /* Function vect_recog_mixed_size_cond_pattern
3408
3409 Try to find the following pattern:
3410
3411 type x_t, y_t;
3412 TYPE a_T, b_T, c_T;
3413 loop:
3414 S1 a_T = x_t CMP y_t ? b_T : c_T;
3415
3416 where type 'TYPE' is an integral type which has different size
3417 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3418 than 'type', the constants need to fit into an integer type
3419 with the same width as 'type') or results of conversion from 'type'.
3420
3421 Input:
3422
3423 * STMT_VINFO: The stmt from which the pattern search begins.
3424
3425 Output:
3426
3427 * TYPE_OUT: The type of the output of this pattern.
3428
3429 * Return value: A new stmt that will be used to replace the pattern.
3430 Additionally a def_stmt is added.
3431
3432 a_it = x_t CMP y_t ? b_it : c_it;
3433 a_T = (TYPE) a_it; */
3434
3435 static gimple *
3436 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3437 {
3438 gimple *last_stmt = stmt_vinfo->stmt;
3439 tree cond_expr, then_clause, else_clause;
3440 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3441 gimple *pattern_stmt, *def_stmt;
3442 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3443 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3444 bool promotion;
3445 tree comp_scalar_type;
3446
3447 if (!is_gimple_assign (last_stmt)
3448 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3449 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3450 return NULL;
3451
3452 cond_expr = gimple_assign_rhs1 (last_stmt);
3453 then_clause = gimple_assign_rhs2 (last_stmt);
3454 else_clause = gimple_assign_rhs3 (last_stmt);
3455
3456 if (!COMPARISON_CLASS_P (cond_expr))
3457 return NULL;
3458
3459 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3460 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3461 if (comp_vectype == NULL_TREE)
3462 return NULL;
3463
3464 type = gimple_expr_type (last_stmt);
3465 if (types_compatible_p (type, comp_scalar_type)
3466 || ((TREE_CODE (then_clause) != INTEGER_CST
3467 || TREE_CODE (else_clause) != INTEGER_CST)
3468 && !INTEGRAL_TYPE_P (comp_scalar_type))
3469 || !INTEGRAL_TYPE_P (type))
3470 return NULL;
3471
3472 if ((TREE_CODE (then_clause) != INTEGER_CST
3473 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3474 &def_stmt0, &promotion))
3475 || (TREE_CODE (else_clause) != INTEGER_CST
3476 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3477 &def_stmt1, &promotion)))
3478 return NULL;
3479
3480 if (orig_type0 && orig_type1
3481 && !types_compatible_p (orig_type0, orig_type1))
3482 return NULL;
3483
3484 if (orig_type0)
3485 {
3486 if (!types_compatible_p (orig_type0, comp_scalar_type))
3487 return NULL;
3488 then_clause = gimple_assign_rhs1 (def_stmt0);
3489 itype = orig_type0;
3490 }
3491
3492 if (orig_type1)
3493 {
3494 if (!types_compatible_p (orig_type1, comp_scalar_type))
3495 return NULL;
3496 else_clause = gimple_assign_rhs1 (def_stmt1);
3497 itype = orig_type1;
3498 }
3499
3500
3501 HOST_WIDE_INT cmp_mode_size
3502 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3503
3504 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3505 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3506 return NULL;
3507
3508 vectype = get_vectype_for_scalar_type (type);
3509 if (vectype == NULL_TREE)
3510 return NULL;
3511
3512 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3513 return NULL;
3514
3515 if (itype == NULL_TREE)
3516 itype = build_nonstandard_integer_type (cmp_mode_size,
3517 TYPE_UNSIGNED (type));
3518
3519 if (itype == NULL_TREE
3520 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3521 return NULL;
3522
3523 vecitype = get_vectype_for_scalar_type (itype);
3524 if (vecitype == NULL_TREE)
3525 return NULL;
3526
3527 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3528 return NULL;
3529
3530 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3531 {
3532 if ((TREE_CODE (then_clause) == INTEGER_CST
3533 && !int_fits_type_p (then_clause, itype))
3534 || (TREE_CODE (else_clause) == INTEGER_CST
3535 && !int_fits_type_p (else_clause, itype)))
3536 return NULL;
3537 }
3538
3539 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3540 COND_EXPR, unshare_expr (cond_expr),
3541 fold_convert (itype, then_clause),
3542 fold_convert (itype, else_clause));
3543 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3544 NOP_EXPR, gimple_assign_lhs (def_stmt));
3545
3546 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3547 *type_out = vectype;
3548
3549 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3550
3551 return pattern_stmt;
3552 }
3553
3554
3555 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3556 true if bool VAR can and should be optimized that way. Assume it shouldn't
3557 in case it's a result of a comparison which can be directly vectorized into
3558 a vector comparison. Fills in STMTS with all stmts visited during the
3559 walk. */
3560
3561 static bool
3562 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3563 {
3564 tree rhs1;
3565 enum tree_code rhs_code;
3566
3567 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3568 if (!def_stmt_info)
3569 return false;
3570
3571 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3572 if (!def_stmt)
3573 return false;
3574
3575 if (stmts.contains (def_stmt))
3576 return true;
3577
3578 rhs1 = gimple_assign_rhs1 (def_stmt);
3579 rhs_code = gimple_assign_rhs_code (def_stmt);
3580 switch (rhs_code)
3581 {
3582 case SSA_NAME:
3583 if (! check_bool_pattern (rhs1, vinfo, stmts))
3584 return false;
3585 break;
3586
3587 CASE_CONVERT:
3588 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3589 return false;
3590 if (! check_bool_pattern (rhs1, vinfo, stmts))
3591 return false;
3592 break;
3593
3594 case BIT_NOT_EXPR:
3595 if (! check_bool_pattern (rhs1, vinfo, stmts))
3596 return false;
3597 break;
3598
3599 case BIT_AND_EXPR:
3600 case BIT_IOR_EXPR:
3601 case BIT_XOR_EXPR:
3602 if (! check_bool_pattern (rhs1, vinfo, stmts)
3603 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3604 return false;
3605 break;
3606
3607 default:
3608 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3609 {
3610 tree vecitype, comp_vectype;
3611
3612 /* If the comparison can throw, then is_gimple_condexpr will be
3613 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3614 if (stmt_could_throw_p (cfun, def_stmt))
3615 return false;
3616
3617 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3618 if (comp_vectype == NULL_TREE)
3619 return false;
3620
3621 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3622 if (mask_type
3623 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3624 return false;
3625
3626 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3627 {
3628 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3629 tree itype
3630 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3631 vecitype = get_vectype_for_scalar_type (itype);
3632 if (vecitype == NULL_TREE)
3633 return false;
3634 }
3635 else
3636 vecitype = comp_vectype;
3637 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3638 return false;
3639 }
3640 else
3641 return false;
3642 break;
3643 }
3644
3645 bool res = stmts.add (def_stmt);
3646 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3647 gcc_assert (!res);
3648
3649 return true;
3650 }
3651
3652
3653 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3654 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3655 pattern sequence. */
3656
3657 static tree
3658 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3659 {
3660 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3661 NOP_EXPR, var);
3662 append_pattern_def_seq (stmt_info, cast_stmt,
3663 get_vectype_for_scalar_type (type));
3664 return gimple_assign_lhs (cast_stmt);
3665 }
3666
3667 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3668 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3669 type, OUT_TYPE is the desired final integer type of the whole pattern.
3670 STMT_INFO is the info of the pattern root and is where pattern stmts should
3671 be associated with. DEFS is a map of pattern defs. */
3672
3673 static void
3674 adjust_bool_pattern (tree var, tree out_type,
3675 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3676 {
3677 gimple *stmt = SSA_NAME_DEF_STMT (var);
3678 enum tree_code rhs_code, def_rhs_code;
3679 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3680 location_t loc;
3681 gimple *pattern_stmt, *def_stmt;
3682 tree trueval = NULL_TREE;
3683
3684 rhs1 = gimple_assign_rhs1 (stmt);
3685 rhs2 = gimple_assign_rhs2 (stmt);
3686 rhs_code = gimple_assign_rhs_code (stmt);
3687 loc = gimple_location (stmt);
3688 switch (rhs_code)
3689 {
3690 case SSA_NAME:
3691 CASE_CONVERT:
3692 irhs1 = *defs.get (rhs1);
3693 itype = TREE_TYPE (irhs1);
3694 pattern_stmt
3695 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3696 SSA_NAME, irhs1);
3697 break;
3698
3699 case BIT_NOT_EXPR:
3700 irhs1 = *defs.get (rhs1);
3701 itype = TREE_TYPE (irhs1);
3702 pattern_stmt
3703 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3704 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3705 break;
3706
3707 case BIT_AND_EXPR:
3708 /* Try to optimize x = y & (a < b ? 1 : 0); into
3709 x = (a < b ? y : 0);
3710
3711 E.g. for:
3712 bool a_b, b_b, c_b;
3713 TYPE d_T;
3714
3715 S1 a_b = x1 CMP1 y1;
3716 S2 b_b = x2 CMP2 y2;
3717 S3 c_b = a_b & b_b;
3718 S4 d_T = (TYPE) c_b;
3719
3720 we would normally emit:
3721
3722 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3723 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3724 S3' c_T = a_T & b_T;
3725 S4' d_T = c_T;
3726
3727 but we can save one stmt by using the
3728 result of one of the COND_EXPRs in the other COND_EXPR and leave
3729 BIT_AND_EXPR stmt out:
3730
3731 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3732 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3733 S4' f_T = c_T;
3734
3735 At least when VEC_COND_EXPR is implemented using masks
3736 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3737 computes the comparison masks and ands it, in one case with
3738 all ones vector, in the other case with a vector register.
3739 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3740 often more expensive. */
3741 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3742 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3743 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3744 {
3745 irhs1 = *defs.get (rhs1);
3746 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3747 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3748 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3749 {
3750 rhs_code = def_rhs_code;
3751 rhs1 = def_rhs1;
3752 rhs2 = gimple_assign_rhs2 (def_stmt);
3753 trueval = irhs1;
3754 goto do_compare;
3755 }
3756 else
3757 irhs2 = *defs.get (rhs2);
3758 goto and_ior_xor;
3759 }
3760 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3761 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3762 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3763 {
3764 irhs2 = *defs.get (rhs2);
3765 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3766 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3767 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3768 {
3769 rhs_code = def_rhs_code;
3770 rhs1 = def_rhs1;
3771 rhs2 = gimple_assign_rhs2 (def_stmt);
3772 trueval = irhs2;
3773 goto do_compare;
3774 }
3775 else
3776 irhs1 = *defs.get (rhs1);
3777 goto and_ior_xor;
3778 }
3779 /* FALLTHRU */
3780 case BIT_IOR_EXPR:
3781 case BIT_XOR_EXPR:
3782 irhs1 = *defs.get (rhs1);
3783 irhs2 = *defs.get (rhs2);
3784 and_ior_xor:
3785 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3786 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3787 {
3788 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3789 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3790 int out_prec = TYPE_PRECISION (out_type);
3791 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3792 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3793 stmt_info);
3794 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3795 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3796 stmt_info);
3797 else
3798 {
3799 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3800 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3801 }
3802 }
3803 itype = TREE_TYPE (irhs1);
3804 pattern_stmt
3805 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3806 rhs_code, irhs1, irhs2);
3807 break;
3808
3809 default:
3810 do_compare:
3811 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3812 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3813 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3814 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3815 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3816 {
3817 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3818 itype
3819 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3820 }
3821 else
3822 itype = TREE_TYPE (rhs1);
3823 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3824 if (trueval == NULL_TREE)
3825 trueval = build_int_cst (itype, 1);
3826 else
3827 gcc_checking_assert (useless_type_conversion_p (itype,
3828 TREE_TYPE (trueval)));
3829 pattern_stmt
3830 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3831 COND_EXPR, cond_expr, trueval,
3832 build_int_cst (itype, 0));
3833 break;
3834 }
3835
3836 gimple_set_location (pattern_stmt, loc);
3837 append_pattern_def_seq (stmt_info, pattern_stmt,
3838 get_vectype_for_scalar_type (itype));
3839 defs.put (var, gimple_assign_lhs (pattern_stmt));
3840 }
3841
3842 /* Comparison function to qsort a vector of gimple stmts after UID. */
3843
3844 static int
3845 sort_after_uid (const void *p1, const void *p2)
3846 {
3847 const gimple *stmt1 = *(const gimple * const *)p1;
3848 const gimple *stmt2 = *(const gimple * const *)p2;
3849 return gimple_uid (stmt1) - gimple_uid (stmt2);
3850 }
3851
3852 /* Create pattern stmts for all stmts participating in the bool pattern
3853 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3854 OUT_TYPE. Return the def of the pattern root. */
3855
3856 static tree
3857 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3858 tree out_type, stmt_vec_info stmt_info)
3859 {
3860 /* Gather original stmts in the bool pattern in their order of appearance
3861 in the IL. */
3862 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3863 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3864 i != bool_stmt_set.end (); ++i)
3865 bool_stmts.quick_push (*i);
3866 bool_stmts.qsort (sort_after_uid);
3867
3868 /* Now process them in that order, producing pattern stmts. */
3869 hash_map <tree, tree> defs;
3870 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3871 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3872 out_type, stmt_info, defs);
3873
3874 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3875 gimple *pattern_stmt
3876 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3877 return gimple_assign_lhs (pattern_stmt);
3878 }
3879
3880 /* Helper for search_type_for_mask. */
3881
3882 static tree
3883 search_type_for_mask_1 (tree var, vec_info *vinfo,
3884 hash_map<gimple *, tree> &cache)
3885 {
3886 tree rhs1;
3887 enum tree_code rhs_code;
3888 tree res = NULL_TREE, res2;
3889
3890 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3891 return NULL_TREE;
3892
3893 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3894 if (!def_stmt_info)
3895 return NULL_TREE;
3896
3897 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3898 if (!def_stmt)
3899 return NULL_TREE;
3900
3901 tree *c = cache.get (def_stmt);
3902 if (c)
3903 return *c;
3904
3905 rhs_code = gimple_assign_rhs_code (def_stmt);
3906 rhs1 = gimple_assign_rhs1 (def_stmt);
3907
3908 switch (rhs_code)
3909 {
3910 case SSA_NAME:
3911 case BIT_NOT_EXPR:
3912 CASE_CONVERT:
3913 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3914 break;
3915
3916 case BIT_AND_EXPR:
3917 case BIT_IOR_EXPR:
3918 case BIT_XOR_EXPR:
3919 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3920 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3921 cache);
3922 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3923 res = res2;
3924 break;
3925
3926 default:
3927 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3928 {
3929 tree comp_vectype, mask_type;
3930
3931 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3932 {
3933 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3934 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3935 vinfo, cache);
3936 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3937 res = res2;
3938 break;
3939 }
3940
3941 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3942 if (comp_vectype == NULL_TREE)
3943 {
3944 res = NULL_TREE;
3945 break;
3946 }
3947
3948 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3949 if (!mask_type
3950 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3951 {
3952 res = NULL_TREE;
3953 break;
3954 }
3955
3956 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3957 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3958 {
3959 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3960 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3961 }
3962 else
3963 res = TREE_TYPE (rhs1);
3964 }
3965 }
3966
3967 cache.put (def_stmt, res);
3968 return res;
3969 }
3970
3971 /* Return the proper type for converting bool VAR into
3972 an integer value or NULL_TREE if no such type exists.
3973 The type is chosen so that converted value has the
3974 same number of elements as VAR's vector type. */
3975
3976 static tree
3977 search_type_for_mask (tree var, vec_info *vinfo)
3978 {
3979 hash_map<gimple *, tree> cache;
3980 return search_type_for_mask_1 (var, vinfo, cache);
3981 }
3982
3983 /* Function vect_recog_bool_pattern
3984
3985 Try to find pattern like following:
3986
3987 bool a_b, b_b, c_b, d_b, e_b;
3988 TYPE f_T;
3989 loop:
3990 S1 a_b = x1 CMP1 y1;
3991 S2 b_b = x2 CMP2 y2;
3992 S3 c_b = a_b & b_b;
3993 S4 d_b = x3 CMP3 y3;
3994 S5 e_b = c_b | d_b;
3995 S6 f_T = (TYPE) e_b;
3996
3997 where type 'TYPE' is an integral type. Or a similar pattern
3998 ending in
3999
4000 S6 f_Y = e_b ? r_Y : s_Y;
4001
4002 as results from if-conversion of a complex condition.
4003
4004 Input:
4005
4006 * STMT_VINFO: The stmt at the end from which the pattern
4007 search begins, i.e. cast of a bool to
4008 an integer type.
4009
4010 Output:
4011
4012 * TYPE_OUT: The type of the output of this pattern.
4013
4014 * Return value: A new stmt that will be used to replace the pattern.
4015
4016 Assuming size of TYPE is the same as size of all comparisons
4017 (otherwise some casts would be added where needed), the above
4018 sequence we create related pattern stmts:
4019 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4020 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4021 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4022 S5' e_T = c_T | d_T;
4023 S6' f_T = e_T;
4024
4025 Instead of the above S3' we could emit:
4026 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4027 S3' c_T = a_T | b_T;
4028 but the above is more efficient. */
4029
4030 static gimple *
4031 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
4032 {
4033 gimple *last_stmt = stmt_vinfo->stmt;
4034 enum tree_code rhs_code;
4035 tree var, lhs, rhs, vectype;
4036 vec_info *vinfo = stmt_vinfo->vinfo;
4037 gimple *pattern_stmt;
4038
4039 if (!is_gimple_assign (last_stmt))
4040 return NULL;
4041
4042 var = gimple_assign_rhs1 (last_stmt);
4043 lhs = gimple_assign_lhs (last_stmt);
4044
4045 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4046 return NULL;
4047
4048 hash_set<gimple *> bool_stmts;
4049
4050 rhs_code = gimple_assign_rhs_code (last_stmt);
4051 if (CONVERT_EXPR_CODE_P (rhs_code))
4052 {
4053 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4054 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
4055 return NULL;
4056 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4057 if (vectype == NULL_TREE)
4058 return NULL;
4059
4060 if (check_bool_pattern (var, vinfo, bool_stmts))
4061 {
4062 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
4063 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4064 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4065 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4066 else
4067 pattern_stmt
4068 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4069 }
4070 else
4071 {
4072 tree type = search_type_for_mask (var, vinfo);
4073 tree cst0, cst1, tmp;
4074
4075 if (!type)
4076 return NULL;
4077
4078 /* We may directly use cond with narrowed type to avoid
4079 multiple cond exprs with following result packing and
4080 perform single cond with packed mask instead. In case
4081 of widening we better make cond first and then extract
4082 results. */
4083 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4084 type = TREE_TYPE (lhs);
4085
4086 cst0 = build_int_cst (type, 0);
4087 cst1 = build_int_cst (type, 1);
4088 tmp = vect_recog_temp_ssa_var (type, NULL);
4089 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4090
4091 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4092 {
4093 tree new_vectype = get_vectype_for_scalar_type (type);
4094 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
4095
4096 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4097 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4098 }
4099 }
4100
4101 *type_out = vectype;
4102 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4103
4104 return pattern_stmt;
4105 }
4106 else if (rhs_code == COND_EXPR
4107 && TREE_CODE (var) == SSA_NAME)
4108 {
4109 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4110 if (vectype == NULL_TREE)
4111 return NULL;
4112
4113 /* Build a scalar type for the boolean result that when
4114 vectorized matches the vector type of the result in
4115 size and number of elements. */
4116 unsigned prec
4117 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4118 TYPE_VECTOR_SUBPARTS (vectype));
4119
4120 tree type
4121 = build_nonstandard_integer_type (prec,
4122 TYPE_UNSIGNED (TREE_TYPE (var)));
4123 if (get_vectype_for_scalar_type (type) == NULL_TREE)
4124 return NULL;
4125
4126 if (!check_bool_pattern (var, vinfo, bool_stmts))
4127 return NULL;
4128
4129 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
4130
4131 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4132 pattern_stmt
4133 = gimple_build_assign (lhs, COND_EXPR,
4134 build2 (NE_EXPR, boolean_type_node,
4135 rhs, build_int_cst (type, 0)),
4136 gimple_assign_rhs2 (last_stmt),
4137 gimple_assign_rhs3 (last_stmt));
4138 *type_out = vectype;
4139 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4140
4141 return pattern_stmt;
4142 }
4143 else if (rhs_code == SSA_NAME
4144 && STMT_VINFO_DATA_REF (stmt_vinfo))
4145 {
4146 stmt_vec_info pattern_stmt_info;
4147 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
4148 gcc_assert (vectype != NULL_TREE);
4149 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
4150 return NULL;
4151
4152 if (check_bool_pattern (var, vinfo, bool_stmts))
4153 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
4154 else
4155 {
4156 tree type = search_type_for_mask (var, vinfo);
4157 tree cst0, cst1, new_vectype;
4158
4159 if (!type)
4160 return NULL;
4161
4162 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4163 type = TREE_TYPE (vectype);
4164
4165 cst0 = build_int_cst (type, 0);
4166 cst1 = build_int_cst (type, 1);
4167 new_vectype = get_vectype_for_scalar_type (type);
4168
4169 rhs = vect_recog_temp_ssa_var (type, NULL);
4170 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4171 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
4172 }
4173
4174 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4175 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4176 {
4177 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4178 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4179 append_pattern_def_seq (stmt_vinfo, cast_stmt);
4180 rhs = rhs2;
4181 }
4182 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4183 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4184 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4185 *type_out = vectype;
4186 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4187
4188 return pattern_stmt;
4189 }
4190 else
4191 return NULL;
4192 }
4193
4194
4195 /* A helper for vect_recog_mask_conversion_pattern. Build
4196 conversion of MASK to a type suitable for masking VECTYPE.
4197 Built statement gets required vectype and is appended to
4198 a pattern sequence of STMT_VINFO.
4199
4200 Return converted mask. */
4201
4202 static tree
4203 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4204 {
4205 gimple *stmt;
4206 tree masktype, tmp;
4207
4208 masktype = build_same_sized_truth_vector_type (vectype);
4209 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4210 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4211 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
4212
4213 return tmp;
4214 }
4215
4216
4217 /* Function vect_recog_mask_conversion_pattern
4218
4219 Try to find statements which require boolean type
4220 converison. Additional conversion statements are
4221 added to handle such cases. For example:
4222
4223 bool m_1, m_2, m_3;
4224 int i_4, i_5;
4225 double d_6, d_7;
4226 char c_1, c_2, c_3;
4227
4228 S1 m_1 = i_4 > i_5;
4229 S2 m_2 = d_6 < d_7;
4230 S3 m_3 = m_1 & m_2;
4231 S4 c_1 = m_3 ? c_2 : c_3;
4232
4233 Will be transformed into:
4234
4235 S1 m_1 = i_4 > i_5;
4236 S2 m_2 = d_6 < d_7;
4237 S3'' m_2' = (_Bool[bitsize=32])m_2
4238 S3' m_3' = m_1 & m_2';
4239 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4240 S4' c_1' = m_3'' ? c_2 : c_3; */
4241
4242 static gimple *
4243 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
4244 {
4245 gimple *last_stmt = stmt_vinfo->stmt;
4246 enum tree_code rhs_code;
4247 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4248 tree vectype1, vectype2;
4249 stmt_vec_info pattern_stmt_info;
4250 vec_info *vinfo = stmt_vinfo->vinfo;
4251
4252 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4253 if (is_gimple_call (last_stmt)
4254 && gimple_call_internal_p (last_stmt))
4255 {
4256 gcall *pattern_stmt;
4257
4258 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4259 int mask_argno = internal_fn_mask_index (ifn);
4260 if (mask_argno < 0)
4261 return NULL;
4262
4263 bool store_p = internal_store_fn_p (ifn);
4264 if (store_p)
4265 {
4266 int rhs_index = internal_fn_stored_value_index (ifn);
4267 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4268 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
4269 }
4270 else
4271 {
4272 lhs = gimple_call_lhs (last_stmt);
4273 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4274 }
4275
4276 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4277 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
4278 if (!mask_arg_type)
4279 return NULL;
4280 vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
4281
4282 if (!vectype1 || !vectype2
4283 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4284 TYPE_VECTOR_SUBPARTS (vectype2)))
4285 return NULL;
4286
4287 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
4288
4289 auto_vec<tree, 8> args;
4290 unsigned int nargs = gimple_call_num_args (last_stmt);
4291 args.safe_grow (nargs);
4292 for (unsigned int i = 0; i < nargs; ++i)
4293 args[i] = ((int) i == mask_argno
4294 ? tmp
4295 : gimple_call_arg (last_stmt, i));
4296 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4297
4298 if (!store_p)
4299 {
4300 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4301 gimple_call_set_lhs (pattern_stmt, lhs);
4302 }
4303 gimple_call_set_nothrow (pattern_stmt, true);
4304
4305 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4306 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4307 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4308
4309 *type_out = vectype1;
4310 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4311
4312 return pattern_stmt;
4313 }
4314
4315 if (!is_gimple_assign (last_stmt))
4316 return NULL;
4317
4318 gimple *pattern_stmt;
4319 lhs = gimple_assign_lhs (last_stmt);
4320 rhs1 = gimple_assign_rhs1 (last_stmt);
4321 rhs_code = gimple_assign_rhs_code (last_stmt);
4322
4323 /* Check for cond expression requiring mask conversion. */
4324 if (rhs_code == COND_EXPR)
4325 {
4326 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4327
4328 if (TREE_CODE (rhs1) == SSA_NAME)
4329 {
4330 rhs1_type = search_type_for_mask (rhs1, vinfo);
4331 if (!rhs1_type)
4332 return NULL;
4333 }
4334 else if (COMPARISON_CLASS_P (rhs1))
4335 {
4336 /* Check whether we're comparing scalar booleans and (if so)
4337 whether a better mask type exists than the mask associated
4338 with boolean-sized elements. This avoids unnecessary packs
4339 and unpacks if the booleans are set from comparisons of
4340 wider types. E.g. in:
4341
4342 int x1, x2, x3, x4, y1, y1;
4343 ...
4344 bool b1 = (x1 == x2);
4345 bool b2 = (x3 == x4);
4346 ... = b1 == b2 ? y1 : y2;
4347
4348 it is better for b1 and b2 to use the mask type associated
4349 with int elements rather bool (byte) elements. */
4350 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4351 if (!rhs1_type)
4352 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4353 }
4354 else
4355 return NULL;
4356
4357 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4358
4359 if (!vectype1 || !vectype2)
4360 return NULL;
4361
4362 /* Continue if a conversion is needed. Also continue if we have
4363 a comparison whose vector type would normally be different from
4364 VECTYPE2 when considered in isolation. In that case we'll
4365 replace the comparison with an SSA name (so that we can record
4366 its vector type) and behave as though the comparison was an SSA
4367 name from the outset. */
4368 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4369 TYPE_VECTOR_SUBPARTS (vectype2))
4370 && (TREE_CODE (rhs1) == SSA_NAME
4371 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4372 return NULL;
4373
4374 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4375 in place, we can handle it in vectorizable_condition. This avoids
4376 unnecessary promotion stmts and increased vectorization factor. */
4377 if (COMPARISON_CLASS_P (rhs1)
4378 && INTEGRAL_TYPE_P (rhs1_type)
4379 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4380 TYPE_VECTOR_SUBPARTS (vectype2)))
4381 {
4382 enum vect_def_type dt;
4383 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4384 && dt == vect_external_def
4385 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4386 && (dt == vect_external_def
4387 || dt == vect_constant_def))
4388 {
4389 tree wide_scalar_type = build_nonstandard_integer_type
4390 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4391 TYPE_UNSIGNED (rhs1_type));
4392 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4393 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4394 return NULL;
4395 }
4396 }
4397
4398 /* If rhs1 is a comparison we need to move it into a
4399 separate statement. */
4400 if (TREE_CODE (rhs1) != SSA_NAME)
4401 {
4402 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4403 pattern_stmt = gimple_build_assign (tmp, rhs1);
4404 rhs1 = tmp;
4405 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
4406 }
4407
4408 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4409 TYPE_VECTOR_SUBPARTS (vectype2)))
4410 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4411 else
4412 tmp = rhs1;
4413
4414 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4415 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4416 gimple_assign_rhs2 (last_stmt),
4417 gimple_assign_rhs3 (last_stmt));
4418
4419 *type_out = vectype1;
4420 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4421
4422 return pattern_stmt;
4423 }
4424
4425 /* Now check for binary boolean operations requiring conversion for
4426 one of operands. */
4427 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4428 return NULL;
4429
4430 if (rhs_code != BIT_IOR_EXPR
4431 && rhs_code != BIT_XOR_EXPR
4432 && rhs_code != BIT_AND_EXPR
4433 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4434 return NULL;
4435
4436 rhs2 = gimple_assign_rhs2 (last_stmt);
4437
4438 rhs1_type = search_type_for_mask (rhs1, vinfo);
4439 rhs2_type = search_type_for_mask (rhs2, vinfo);
4440
4441 if (!rhs1_type || !rhs2_type
4442 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4443 return NULL;
4444
4445 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4446 {
4447 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4448 if (!vectype1)
4449 return NULL;
4450 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4451 }
4452 else
4453 {
4454 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4455 if (!vectype1)
4456 return NULL;
4457 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4458 }
4459
4460 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4461 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4462
4463 *type_out = vectype1;
4464 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4465
4466 return pattern_stmt;
4467 }
4468
4469 /* STMT_INFO is a load or store. If the load or store is conditional, return
4470 the boolean condition under which it occurs, otherwise return null. */
4471
4472 static tree
4473 vect_get_load_store_mask (stmt_vec_info stmt_info)
4474 {
4475 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4476 {
4477 gcc_assert (gimple_assign_single_p (def_assign));
4478 return NULL_TREE;
4479 }
4480
4481 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4482 {
4483 internal_fn ifn = gimple_call_internal_fn (def_call);
4484 int mask_index = internal_fn_mask_index (ifn);
4485 return gimple_call_arg (def_call, mask_index);
4486 }
4487
4488 gcc_unreachable ();
4489 }
4490
4491 /* Return the scalar offset type that an internal gather/scatter function
4492 should use. GS_INFO describes the gather/scatter operation. */
4493
4494 static tree
4495 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4496 {
4497 tree offset_type = TREE_TYPE (gs_info->offset);
4498 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4499
4500 /* Enforced by vect_check_gather_scatter. */
4501 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4502 gcc_assert (element_bits >= offset_bits);
4503
4504 /* If the offset is narrower than the elements, extend it according
4505 to its sign. */
4506 if (element_bits > offset_bits)
4507 return build_nonstandard_integer_type (element_bits,
4508 TYPE_UNSIGNED (offset_type));
4509
4510 return offset_type;
4511 }
4512
4513 /* Return MASK if MASK is suitable for masking an operation on vectors
4514 of type VECTYPE, otherwise convert it into such a form and return
4515 the result. Associate any conversion statements with STMT_INFO's
4516 pattern. */
4517
4518 static tree
4519 vect_convert_mask_for_vectype (tree mask, tree vectype,
4520 stmt_vec_info stmt_info, vec_info *vinfo)
4521 {
4522 tree mask_type = search_type_for_mask (mask, vinfo);
4523 if (mask_type)
4524 {
4525 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4526 if (mask_vectype
4527 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4528 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4529 mask = build_mask_conversion (mask, vectype, stmt_info);
4530 }
4531 return mask;
4532 }
4533
4534 /* Return the equivalent of:
4535
4536 fold_convert (TYPE, VALUE)
4537
4538 with the expectation that the operation will be vectorized.
4539 If new statements are needed, add them as pattern statements
4540 to STMT_INFO. */
4541
4542 static tree
4543 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4544 {
4545 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4546 return value;
4547
4548 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4549 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4550 append_pattern_def_seq (stmt_info, conversion,
4551 get_vectype_for_scalar_type (type));
4552 return new_value;
4553 }
4554
4555 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4556 internal function. Return the final statement on success and set
4557 *TYPE_OUT to the vector type being loaded or stored.
4558
4559 This function only handles gathers and scatters that were recognized
4560 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4561
4562 static gimple *
4563 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4564 {
4565 /* Currently we only support this for loop vectorization. */
4566 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4567 if (!loop_vinfo)
4568 return NULL;
4569
4570 /* Make sure that we're looking at a gather load or scatter store. */
4571 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4572 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4573 return NULL;
4574
4575 /* Get the boolean that controls whether the load or store happens.
4576 This is null if the operation is unconditional. */
4577 tree mask = vect_get_load_store_mask (stmt_info);
4578
4579 /* Make sure that the target supports an appropriate internal
4580 function for the gather/scatter operation. */
4581 gather_scatter_info gs_info;
4582 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4583 || gs_info.decl)
4584 return NULL;
4585
4586 /* Convert the mask to the right form. */
4587 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4588 if (mask)
4589 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4590 loop_vinfo);
4591
4592 /* Get the invariant base and non-invariant offset, converting the
4593 latter to the same width as the vector elements. */
4594 tree base = gs_info.base;
4595 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4596 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4597 stmt_info);
4598
4599 /* Build the new pattern statement. */
4600 tree scale = size_int (gs_info.scale);
4601 gcall *pattern_stmt;
4602 if (DR_IS_READ (dr))
4603 {
4604 if (mask != NULL)
4605 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4606 offset, scale, mask);
4607 else
4608 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4609 offset, scale);
4610 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4611 gimple_call_set_lhs (pattern_stmt, load_lhs);
4612 }
4613 else
4614 {
4615 tree rhs = vect_get_store_rhs (stmt_info);
4616 if (mask != NULL)
4617 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4618 base, offset, scale, rhs,
4619 mask);
4620 else
4621 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4622 base, offset, scale, rhs);
4623 }
4624 gimple_call_set_nothrow (pattern_stmt, true);
4625
4626 /* Copy across relevant vectorization info and associate DR with the
4627 new pattern statement instead of the original statement. */
4628 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4629 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4630
4631 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4632 *type_out = vectype;
4633 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4634
4635 return pattern_stmt;
4636 }
4637
4638 /* Return true if TYPE is a non-boolean integer type. These are the types
4639 that we want to consider for narrowing. */
4640
4641 static bool
4642 vect_narrowable_type_p (tree type)
4643 {
4644 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4645 }
4646
4647 /* Return true if the operation given by CODE can be truncated to N bits
4648 when only N bits of the output are needed. This is only true if bit N+1
4649 of the inputs has no effect on the low N bits of the result. */
4650
4651 static bool
4652 vect_truncatable_operation_p (tree_code code)
4653 {
4654 switch (code)
4655 {
4656 case PLUS_EXPR:
4657 case MINUS_EXPR:
4658 case MULT_EXPR:
4659 case BIT_AND_EXPR:
4660 case BIT_IOR_EXPR:
4661 case BIT_XOR_EXPR:
4662 case COND_EXPR:
4663 return true;
4664
4665 default:
4666 return false;
4667 }
4668 }
4669
4670 /* Record that STMT_INFO could be changed from operating on TYPE to
4671 operating on a type with the precision and sign given by PRECISION
4672 and SIGN respectively. PRECISION is an arbitrary bit precision;
4673 it might not be a whole number of bytes. */
4674
4675 static void
4676 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4677 unsigned int precision, signop sign)
4678 {
4679 /* Round the precision up to a whole number of bytes. */
4680 precision = vect_element_precision (precision);
4681 if (precision < TYPE_PRECISION (type)
4682 && (!stmt_info->operation_precision
4683 || stmt_info->operation_precision > precision))
4684 {
4685 stmt_info->operation_precision = precision;
4686 stmt_info->operation_sign = sign;
4687 }
4688 }
4689
4690 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4691 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4692 is an arbitrary bit precision; it might not be a whole number of bytes. */
4693
4694 static void
4695 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4696 unsigned int min_input_precision)
4697 {
4698 /* This operation in isolation only requires the inputs to have
4699 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4700 that MIN_INPUT_PRECISION is a natural precision for the chain
4701 as a whole. E.g. consider something like:
4702
4703 unsigned short *x, *y;
4704 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4705
4706 The right shift can be done on unsigned chars, and only requires the
4707 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4708 approach would mean turning a natural chain of single-vector unsigned
4709 short operations into one that truncates "*x" and then extends
4710 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4711 operation and one vector for each unsigned char operation.
4712 This would be a significant pessimization.
4713
4714 Instead only propagate the maximum of this precision and the precision
4715 required by the users of the result. This means that we don't pessimize
4716 the case above but continue to optimize things like:
4717
4718 unsigned char *y;
4719 unsigned short *x;
4720 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4721
4722 Here we would truncate two vectors of *x to a single vector of
4723 unsigned chars and use single-vector unsigned char operations for
4724 everything else, rather than doing two unsigned short copies of
4725 "(*x & 0xf0) >> 4" and then truncating the result. */
4726 min_input_precision = MAX (min_input_precision,
4727 stmt_info->min_output_precision);
4728
4729 if (min_input_precision < TYPE_PRECISION (type)
4730 && (!stmt_info->min_input_precision
4731 || stmt_info->min_input_precision > min_input_precision))
4732 stmt_info->min_input_precision = min_input_precision;
4733 }
4734
4735 /* Subroutine of vect_determine_min_output_precision. Return true if
4736 we can calculate a reduced number of output bits for STMT_INFO,
4737 whose result is LHS. */
4738
4739 static bool
4740 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4741 {
4742 /* Take the maximum precision required by users of the result. */
4743 vec_info *vinfo = stmt_info->vinfo;
4744 unsigned int precision = 0;
4745 imm_use_iterator iter;
4746 use_operand_p use;
4747 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4748 {
4749 gimple *use_stmt = USE_STMT (use);
4750 if (is_gimple_debug (use_stmt))
4751 continue;
4752 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4753 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4754 return false;
4755 /* The input precision recorded for COND_EXPRs applies only to the
4756 "then" and "else" values. */
4757 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4758 if (assign
4759 && gimple_assign_rhs_code (assign) == COND_EXPR
4760 && use->use != gimple_assign_rhs2_ptr (assign)
4761 && use->use != gimple_assign_rhs3_ptr (assign))
4762 return false;
4763 precision = MAX (precision, use_stmt_info->min_input_precision);
4764 }
4765
4766 if (dump_enabled_p ())
4767 dump_printf_loc (MSG_NOTE, vect_location,
4768 "only the low %d bits of %T are significant\n",
4769 precision, lhs);
4770 stmt_info->min_output_precision = precision;
4771 return true;
4772 }
4773
4774 /* Calculate min_output_precision for STMT_INFO. */
4775
4776 static void
4777 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4778 {
4779 /* We're only interested in statements with a narrowable result. */
4780 tree lhs = gimple_get_lhs (stmt_info->stmt);
4781 if (!lhs
4782 || TREE_CODE (lhs) != SSA_NAME
4783 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4784 return;
4785
4786 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4787 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4788 }
4789
4790 /* Use range information to decide whether STMT (described by STMT_INFO)
4791 could be done in a narrower type. This is effectively a forward
4792 propagation, since it uses context-independent information that applies
4793 to all users of an SSA name. */
4794
4795 static void
4796 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4797 {
4798 tree lhs = gimple_assign_lhs (stmt);
4799 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4800 return;
4801
4802 tree type = TREE_TYPE (lhs);
4803 if (!vect_narrowable_type_p (type))
4804 return;
4805
4806 /* First see whether we have any useful range information for the result. */
4807 unsigned int precision = TYPE_PRECISION (type);
4808 signop sign = TYPE_SIGN (type);
4809 wide_int min_value, max_value;
4810 if (!vect_get_range_info (lhs, &min_value, &max_value))
4811 return;
4812
4813 tree_code code = gimple_assign_rhs_code (stmt);
4814 unsigned int nops = gimple_num_ops (stmt);
4815
4816 if (!vect_truncatable_operation_p (code))
4817 /* Check that all relevant input operands are compatible, and update
4818 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4819 for (unsigned int i = 1; i < nops; ++i)
4820 {
4821 tree op = gimple_op (stmt, i);
4822 if (TREE_CODE (op) == INTEGER_CST)
4823 {
4824 /* Don't require the integer to have RHS_TYPE (which it might
4825 not for things like shift amounts, etc.), but do require it
4826 to fit the type. */
4827 if (!int_fits_type_p (op, type))
4828 return;
4829
4830 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4831 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4832 }
4833 else if (TREE_CODE (op) == SSA_NAME)
4834 {
4835 /* Ignore codes that don't take uniform arguments. */
4836 if (!types_compatible_p (TREE_TYPE (op), type))
4837 return;
4838
4839 wide_int op_min_value, op_max_value;
4840 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4841 return;
4842
4843 min_value = wi::min (min_value, op_min_value, sign);
4844 max_value = wi::max (max_value, op_max_value, sign);
4845 }
4846 else
4847 return;
4848 }
4849
4850 /* Try to switch signed types for unsigned types if we can.
4851 This is better for two reasons. First, unsigned ops tend
4852 to be cheaper than signed ops. Second, it means that we can
4853 handle things like:
4854
4855 signed char c;
4856 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4857
4858 as:
4859
4860 signed char c;
4861 unsigned short res_1 = (unsigned short) c & 0xff00;
4862 int res = (int) res_1;
4863
4864 where the intermediate result res_1 has unsigned rather than
4865 signed type. */
4866 if (sign == SIGNED && !wi::neg_p (min_value))
4867 sign = UNSIGNED;
4868
4869 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4870 unsigned int precision1 = wi::min_precision (min_value, sign);
4871 unsigned int precision2 = wi::min_precision (max_value, sign);
4872 unsigned int value_precision = MAX (precision1, precision2);
4873 if (value_precision >= precision)
4874 return;
4875
4876 if (dump_enabled_p ())
4877 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4878 " without loss of precision: %G",
4879 sign == SIGNED ? "signed" : "unsigned",
4880 value_precision, stmt);
4881
4882 vect_set_operation_type (stmt_info, type, value_precision, sign);
4883 vect_set_min_input_precision (stmt_info, type, value_precision);
4884 }
4885
4886 /* Use information about the users of STMT's result to decide whether
4887 STMT (described by STMT_INFO) could be done in a narrower type.
4888 This is effectively a backward propagation. */
4889
4890 static void
4891 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4892 {
4893 tree_code code = gimple_assign_rhs_code (stmt);
4894 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4895 tree type = TREE_TYPE (gimple_op (stmt, opno));
4896 if (!vect_narrowable_type_p (type))
4897 return;
4898
4899 unsigned int precision = TYPE_PRECISION (type);
4900 unsigned int operation_precision, min_input_precision;
4901 switch (code)
4902 {
4903 CASE_CONVERT:
4904 /* Only the bits that contribute to the output matter. Don't change
4905 the precision of the operation itself. */
4906 operation_precision = precision;
4907 min_input_precision = stmt_info->min_output_precision;
4908 break;
4909
4910 case LSHIFT_EXPR:
4911 case RSHIFT_EXPR:
4912 {
4913 tree shift = gimple_assign_rhs2 (stmt);
4914 if (TREE_CODE (shift) != INTEGER_CST
4915 || !wi::ltu_p (wi::to_widest (shift), precision))
4916 return;
4917 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4918 if (code == LSHIFT_EXPR)
4919 {
4920 /* We need CONST_SHIFT fewer bits of the input. */
4921 operation_precision = stmt_info->min_output_precision;
4922 min_input_precision = (MAX (operation_precision, const_shift)
4923 - const_shift);
4924 }
4925 else
4926 {
4927 /* We need CONST_SHIFT extra bits to do the operation. */
4928 operation_precision = (stmt_info->min_output_precision
4929 + const_shift);
4930 min_input_precision = operation_precision;
4931 }
4932 break;
4933 }
4934
4935 default:
4936 if (vect_truncatable_operation_p (code))
4937 {
4938 /* Input bit N has no effect on output bits N-1 and lower. */
4939 operation_precision = stmt_info->min_output_precision;
4940 min_input_precision = operation_precision;
4941 break;
4942 }
4943 return;
4944 }
4945
4946 if (operation_precision < precision)
4947 {
4948 if (dump_enabled_p ())
4949 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4950 " without affecting users: %G",
4951 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4952 operation_precision, stmt);
4953 vect_set_operation_type (stmt_info, type, operation_precision,
4954 TYPE_SIGN (type));
4955 }
4956 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4957 }
4958
4959 /* Handle vect_determine_precisions for STMT_INFO, given that we
4960 have already done so for the users of its result. */
4961
4962 void
4963 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4964 {
4965 vect_determine_min_output_precision (stmt_info);
4966 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4967 {
4968 vect_determine_precisions_from_range (stmt_info, stmt);
4969 vect_determine_precisions_from_users (stmt_info, stmt);
4970 }
4971 }
4972
4973 /* Walk backwards through the vectorizable region to determine the
4974 values of these fields:
4975
4976 - min_output_precision
4977 - min_input_precision
4978 - operation_precision
4979 - operation_sign. */
4980
4981 void
4982 vect_determine_precisions (vec_info *vinfo)
4983 {
4984 DUMP_VECT_SCOPE ("vect_determine_precisions");
4985
4986 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4987 {
4988 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4989 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4990 unsigned int nbbs = loop->num_nodes;
4991
4992 for (unsigned int i = 0; i < nbbs; i++)
4993 {
4994 basic_block bb = bbs[nbbs - i - 1];
4995 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4996 !gsi_end_p (si); gsi_prev (&si))
4997 vect_determine_stmt_precisions
4998 (vinfo->lookup_stmt (gsi_stmt (si)));
4999 }
5000 }
5001 else
5002 {
5003 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5004 gimple_stmt_iterator si = bb_vinfo->region_end;
5005 gimple *stmt;
5006 do
5007 {
5008 if (!gsi_stmt (si))
5009 si = gsi_last_bb (bb_vinfo->bb);
5010 else
5011 gsi_prev (&si);
5012 stmt = gsi_stmt (si);
5013 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
5014 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5015 vect_determine_stmt_precisions (stmt_info);
5016 }
5017 while (stmt != gsi_stmt (bb_vinfo->region_begin));
5018 }
5019 }
5020
5021 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
5022
5023 struct vect_recog_func
5024 {
5025 vect_recog_func_ptr fn;
5026 const char *name;
5027 };
5028
5029 /* Note that ordering matters - the first pattern matching on a stmt is
5030 taken which means usually the more complex one needs to preceed the
5031 less comples onex (widen_sum only after dot_prod or sad for example). */
5032 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5033 { vect_recog_over_widening_pattern, "over_widening" },
5034 /* Must come after over_widening, which narrows the shift as much as
5035 possible beforehand. */
5036 { vect_recog_average_pattern, "average" },
5037 { vect_recog_mulhs_pattern, "mult_high" },
5038 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5039 { vect_recog_widen_mult_pattern, "widen_mult" },
5040 { vect_recog_dot_prod_pattern, "dot_prod" },
5041 { vect_recog_sad_pattern, "sad" },
5042 { vect_recog_widen_sum_pattern, "widen_sum" },
5043 { vect_recog_pow_pattern, "pow" },
5044 { vect_recog_widen_shift_pattern, "widen_shift" },
5045 { vect_recog_rotate_pattern, "rotate" },
5046 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5047 { vect_recog_divmod_pattern, "divmod" },
5048 { vect_recog_mult_pattern, "mult" },
5049 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5050 { vect_recog_bool_pattern, "bool" },
5051 /* This must come before mask conversion, and includes the parts
5052 of mask conversion that are needed for gather and scatter
5053 internal functions. */
5054 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5055 { vect_recog_mask_conversion_pattern, "mask_conversion" }
5056 };
5057
5058 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5059
5060 /* Mark statements that are involved in a pattern. */
5061
5062 static inline void
5063 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5064 tree pattern_vectype)
5065 {
5066 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5067
5068 gimple *orig_pattern_stmt = NULL;
5069 if (is_pattern_stmt_p (orig_stmt_info))
5070 {
5071 /* We're replacing a statement in an existing pattern definition
5072 sequence. */
5073 orig_pattern_stmt = orig_stmt_info->stmt;
5074 if (dump_enabled_p ())
5075 dump_printf_loc (MSG_NOTE, vect_location,
5076 "replacing earlier pattern %G", orig_pattern_stmt);
5077
5078 /* To keep the book-keeping simple, just swap the lhs of the
5079 old and new statements, so that the old one has a valid but
5080 unused lhs. */
5081 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5082 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5083 gimple_set_lhs (pattern_stmt, old_lhs);
5084
5085 if (dump_enabled_p ())
5086 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5087
5088 /* Switch to the statement that ORIG replaces. */
5089 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5090
5091 /* We shouldn't be replacing the main pattern statement. */
5092 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5093 != orig_pattern_stmt);
5094 }
5095
5096 if (def_seq)
5097 for (gimple_stmt_iterator si = gsi_start (def_seq);
5098 !gsi_end_p (si); gsi_next (&si))
5099 {
5100 stmt_vec_info pattern_stmt_info
5101 = vect_init_pattern_stmt (gsi_stmt (si),
5102 orig_stmt_info, pattern_vectype);
5103 /* Stmts in the def sequence are not vectorizable cycle or
5104 induction defs, instead they should all be vect_internal_def
5105 feeding the main pattern stmt which retains this def type. */
5106 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5107 }
5108
5109 if (orig_pattern_stmt)
5110 {
5111 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
5112
5113 /* Insert all the new pattern statements before the original one. */
5114 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5115 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5116 orig_def_seq);
5117 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5118 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5119
5120 /* Remove the pattern statement that this new pattern replaces. */
5121 gsi_remove (&gsi, false);
5122 }
5123 else
5124 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
5125 }
5126
5127 /* Function vect_pattern_recog_1
5128
5129 Input:
5130 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5131 computation pattern.
5132 STMT_INFO: A stmt from which the pattern search should start.
5133
5134 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5135 a sequence of statements that has the same functionality and can be
5136 used to replace STMT_INFO. It returns the last statement in the sequence
5137 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5138 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5139 statement, having first checked that the target supports the new operation
5140 in that type.
5141
5142 This function also does some bookkeeping, as explained in the documentation
5143 for vect_recog_pattern. */
5144
5145 static void
5146 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
5147 {
5148 vec_info *vinfo = stmt_info->vinfo;
5149 gimple *pattern_stmt;
5150 loop_vec_info loop_vinfo;
5151 tree pattern_vectype;
5152
5153 /* If this statement has already been replaced with pattern statements,
5154 leave the original statement alone, since the first match wins.
5155 Instead try to match against the definition statements that feed
5156 the main pattern statement. */
5157 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5158 {
5159 gimple_stmt_iterator gsi;
5160 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5161 !gsi_end_p (gsi); gsi_next (&gsi))
5162 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
5163 return;
5164 }
5165
5166 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5167 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
5168 if (!pattern_stmt)
5169 {
5170 /* Clear any half-formed pattern definition sequence. */
5171 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5172 return;
5173 }
5174
5175 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5176 gcc_assert (pattern_vectype);
5177
5178 /* Found a vectorizable pattern. */
5179 if (dump_enabled_p ())
5180 dump_printf_loc (MSG_NOTE, vect_location,
5181 "%s pattern recognized: %G",
5182 recog_func->name, pattern_stmt);
5183
5184 /* Mark the stmts that are involved in the pattern. */
5185 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
5186
5187 /* Patterns cannot be vectorized using SLP, because they change the order of
5188 computation. */
5189 if (loop_vinfo)
5190 {
5191 unsigned ix, ix2;
5192 stmt_vec_info *elem_ptr;
5193 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5194 elem_ptr, *elem_ptr == stmt_info);
5195 }
5196 }
5197
5198
5199 /* Function vect_pattern_recog
5200
5201 Input:
5202 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5203 computation idioms.
5204
5205 Output - for each computation idiom that is detected we create a new stmt
5206 that provides the same functionality and that can be vectorized. We
5207 also record some information in the struct_stmt_info of the relevant
5208 stmts, as explained below:
5209
5210 At the entry to this function we have the following stmts, with the
5211 following initial value in the STMT_VINFO fields:
5212
5213 stmt in_pattern_p related_stmt vec_stmt
5214 S1: a_i = .... - - -
5215 S2: a_2 = ..use(a_i).. - - -
5216 S3: a_1 = ..use(a_2).. - - -
5217 S4: a_0 = ..use(a_1).. - - -
5218 S5: ... = ..use(a_0).. - - -
5219
5220 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5221 represented by a single stmt. We then:
5222 - create a new stmt S6 equivalent to the pattern (the stmt is not
5223 inserted into the code)
5224 - fill in the STMT_VINFO fields as follows:
5225
5226 in_pattern_p related_stmt vec_stmt
5227 S1: a_i = .... - - -
5228 S2: a_2 = ..use(a_i).. - - -
5229 S3: a_1 = ..use(a_2).. - - -
5230 S4: a_0 = ..use(a_1).. true S6 -
5231 '---> S6: a_new = .... - S4 -
5232 S5: ... = ..use(a_0).. - - -
5233
5234 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5235 to each other through the RELATED_STMT field).
5236
5237 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5238 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5239 remain irrelevant unless used by stmts other than S4.
5240
5241 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5242 (because they are marked as irrelevant). It will vectorize S6, and record
5243 a pointer to the new vector stmt VS6 from S6 (as usual).
5244 S4 will be skipped, and S5 will be vectorized as usual:
5245
5246 in_pattern_p related_stmt vec_stmt
5247 S1: a_i = .... - - -
5248 S2: a_2 = ..use(a_i).. - - -
5249 S3: a_1 = ..use(a_2).. - - -
5250 > VS6: va_new = .... - - -
5251 S4: a_0 = ..use(a_1).. true S6 VS6
5252 '---> S6: a_new = .... - S4 VS6
5253 > VS5: ... = ..vuse(va_new).. - - -
5254 S5: ... = ..use(a_0).. - - -
5255
5256 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5257 elsewhere), and we'll end up with:
5258
5259 VS6: va_new = ....
5260 VS5: ... = ..vuse(va_new)..
5261
5262 In case of more than one pattern statements, e.g., widen-mult with
5263 intermediate type:
5264
5265 S1 a_t = ;
5266 S2 a_T = (TYPE) a_t;
5267 '--> S3: a_it = (interm_type) a_t;
5268 S4 prod_T = a_T * CONST;
5269 '--> S5: prod_T' = a_it w* CONST;
5270
5271 there may be other users of a_T outside the pattern. In that case S2 will
5272 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5273 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5274 be recorded in S3. */
5275
5276 void
5277 vect_pattern_recog (vec_info *vinfo)
5278 {
5279 class loop *loop;
5280 basic_block *bbs;
5281 unsigned int nbbs;
5282 gimple_stmt_iterator si;
5283 unsigned int i, j;
5284
5285 vect_determine_precisions (vinfo);
5286
5287 DUMP_VECT_SCOPE ("vect_pattern_recog");
5288
5289 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5290 {
5291 loop = LOOP_VINFO_LOOP (loop_vinfo);
5292 bbs = LOOP_VINFO_BBS (loop_vinfo);
5293 nbbs = loop->num_nodes;
5294
5295 /* Scan through the loop stmts, applying the pattern recognition
5296 functions starting at each stmt visited: */
5297 for (i = 0; i < nbbs; i++)
5298 {
5299 basic_block bb = bbs[i];
5300 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5301 {
5302 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5303 /* Scan over all generic vect_recog_xxx_pattern functions. */
5304 for (j = 0; j < NUM_PATTERNS; j++)
5305 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
5306 stmt_info);
5307 }
5308 }
5309 }
5310 else
5311 {
5312 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5313 for (si = bb_vinfo->region_begin;
5314 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
5315 {
5316 gimple *stmt = gsi_stmt (si);
5317 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
5318 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
5319 continue;
5320
5321 /* Scan over all generic vect_recog_xxx_pattern functions. */
5322 for (j = 0; j < NUM_PATTERNS; j++)
5323 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);
5324 }
5325 }
5326 }