]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-patterns.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.cc
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "gimple-iterator.h"
29 #include "gimple-fold.h"
30 #include "ssa.h"
31 #include "expmed.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "tree-eh.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimple-fold.h"
41 #include "gimplify-me.h"
42 #include "cfgloop.h"
43 #include "tree-vectorizer.h"
44 #include "dumpfile.h"
45 #include "builtins.h"
46 #include "internal-fn.h"
47 #include "case-cfn-macros.h"
48 #include "fold-const-call.h"
49 #include "attribs.h"
50 #include "cgraph.h"
51 #include "omp-simd-clone.h"
52 #include "predict.h"
53 #include "tree-vector-builder.h"
54 #include "vec-perm-indices.h"
55 #include "gimple-range.h"
56
57
58 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
59 in the first operand. Disentangling this is future work, the
60 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
61
62
63 /* Return true if we have a useful VR_RANGE range for VAR, storing it
64 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
65
66 bool
67 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
68 {
69 value_range vr;
70 tree vr_min, vr_max;
71 get_range_query (cfun)->range_of_expr (vr, var);
72 if (vr.undefined_p ())
73 vr.set_varying (TREE_TYPE (var));
74 value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
75 *min_value = wi::to_wide (vr_min);
76 *max_value = wi::to_wide (vr_max);
77 wide_int nonzero = get_nonzero_bits (var);
78 signop sgn = TYPE_SIGN (TREE_TYPE (var));
79 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
80 nonzero, sgn) == VR_RANGE)
81 {
82 if (dump_enabled_p ())
83 {
84 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
85 dump_printf (MSG_NOTE, " has range [");
86 dump_hex (MSG_NOTE, *min_value);
87 dump_printf (MSG_NOTE, ", ");
88 dump_hex (MSG_NOTE, *max_value);
89 dump_printf (MSG_NOTE, "]\n");
90 }
91 return true;
92 }
93 else
94 {
95 if (dump_enabled_p ())
96 {
97 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
98 dump_printf (MSG_NOTE, " has no range info\n");
99 }
100 return false;
101 }
102 }
103
104 /* Report that we've found an instance of pattern PATTERN in
105 statement STMT. */
106
107 static void
108 vect_pattern_detected (const char *name, gimple *stmt)
109 {
110 if (dump_enabled_p ())
111 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
112 }
113
114 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
115 return the pattern statement's stmt_vec_info. Set its vector type to
116 VECTYPE if it doesn't have one already. */
117
118 static stmt_vec_info
119 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
120 stmt_vec_info orig_stmt_info, tree vectype)
121 {
122 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
123 if (pattern_stmt_info == NULL)
124 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
125 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
126
127 pattern_stmt_info->pattern_stmt_p = true;
128 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
129 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
130 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
131 STMT_VINFO_TYPE (pattern_stmt_info) = STMT_VINFO_TYPE (orig_stmt_info);
132 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
133 {
134 gcc_assert (!vectype
135 || is_a <gcond *> (pattern_stmt)
136 || (VECTOR_BOOLEAN_TYPE_P (vectype)
137 == vect_use_mask_type_p (orig_stmt_info)));
138 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
139 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
140 }
141 return pattern_stmt_info;
142 }
143
144 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
145 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
146 have one already. */
147
148 static void
149 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
150 stmt_vec_info orig_stmt_info, tree vectype)
151 {
152 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
153 STMT_VINFO_RELATED_STMT (orig_stmt_info)
154 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
155 }
156
157 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
158 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
159 be different from the vector type of the final pattern statement.
160 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
161 from which it was derived. */
162
163 static inline void
164 append_pattern_def_seq (vec_info *vinfo,
165 stmt_vec_info stmt_info, gimple *new_stmt,
166 tree vectype = NULL_TREE,
167 tree scalar_type_for_mask = NULL_TREE)
168 {
169 gcc_assert (!scalar_type_for_mask
170 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
171 if (vectype)
172 {
173 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
174 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
175 if (scalar_type_for_mask)
176 new_stmt_info->mask_precision
177 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
178 }
179 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
180 new_stmt);
181 }
182
183 /* The caller wants to perform new operations on vect_external variable
184 VAR, so that the result of the operations would also be vect_external.
185 Return the edge on which the operations can be performed, if one exists.
186 Return null if the operations should instead be treated as part of
187 the pattern that needs them. */
188
189 static edge
190 vect_get_external_def_edge (vec_info *vinfo, tree var)
191 {
192 edge e = NULL;
193 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
194 {
195 e = loop_preheader_edge (loop_vinfo->loop);
196 if (!SSA_NAME_IS_DEFAULT_DEF (var))
197 {
198 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
199 if (bb == NULL
200 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
201 e = NULL;
202 }
203 }
204 return e;
205 }
206
207 /* Return true if the target supports a vector version of CODE,
208 where CODE is known to map to a direct optab with the given SUBTYPE.
209 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
210 specifies the type of the scalar result.
211
212 If CODE allows the inputs and outputs to have different type
213 (such as for WIDEN_SUM_EXPR), it is the input mode rather
214 than the output mode that determines the appropriate target pattern.
215 Operand 0 of the target pattern then specifies the mode that the output
216 must have.
217
218 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
219 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
220 is nonnull. */
221
222 static bool
223 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
224 tree itype, tree *vecotype_out,
225 tree *vecitype_out = NULL,
226 enum optab_subtype subtype = optab_default)
227 {
228 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
229 if (!vecitype)
230 return false;
231
232 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
233 if (!vecotype)
234 return false;
235
236 optab optab = optab_for_tree_code (code, vecitype, subtype);
237 if (!optab)
238 return false;
239
240 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
241 if (icode == CODE_FOR_nothing
242 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
243 return false;
244
245 *vecotype_out = vecotype;
246 if (vecitype_out)
247 *vecitype_out = vecitype;
248 return true;
249 }
250
251 /* Round bit precision PRECISION up to a full element. */
252
253 static unsigned int
254 vect_element_precision (unsigned int precision)
255 {
256 precision = 1 << ceil_log2 (precision);
257 return MAX (precision, BITS_PER_UNIT);
258 }
259
260 /* If OP is defined by a statement that's being considered for vectorization,
261 return information about that statement, otherwise return NULL. */
262
263 static stmt_vec_info
264 vect_get_internal_def (vec_info *vinfo, tree op)
265 {
266 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
267 if (def_stmt_info
268 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
269 return def_stmt_info;
270 return NULL;
271 }
272
273 /* Check whether NAME, an ssa-name used in STMT_VINFO,
274 is a result of a type promotion, such that:
275 DEF_STMT: NAME = NOP (name0)
276 If CHECK_SIGN is TRUE, check that either both types are signed or both are
277 unsigned. */
278
279 static bool
280 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
281 tree *orig_type, gimple **def_stmt, bool *promotion)
282 {
283 tree type = TREE_TYPE (name);
284 tree oprnd0;
285 enum vect_def_type dt;
286
287 stmt_vec_info def_stmt_info;
288 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
289 return false;
290
291 if (dt != vect_internal_def
292 && dt != vect_external_def && dt != vect_constant_def)
293 return false;
294
295 if (!*def_stmt)
296 return false;
297
298 if (!is_gimple_assign (*def_stmt))
299 return false;
300
301 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
302 return false;
303
304 oprnd0 = gimple_assign_rhs1 (*def_stmt);
305
306 *orig_type = TREE_TYPE (oprnd0);
307 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
308 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
309 return false;
310
311 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
312 *promotion = true;
313 else
314 *promotion = false;
315
316 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
317 return false;
318
319 return true;
320 }
321
322 /* Holds information about an input operand after some sign changes
323 and type promotions have been peeled away. */
324 class vect_unpromoted_value {
325 public:
326 vect_unpromoted_value ();
327
328 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
329
330 /* The value obtained after peeling away zero or more casts. */
331 tree op;
332
333 /* The type of OP. */
334 tree type;
335
336 /* The definition type of OP. */
337 vect_def_type dt;
338
339 /* If OP is the result of peeling at least one cast, and if the cast
340 of OP itself is a vectorizable statement, CASTER identifies that
341 statement, otherwise it is null. */
342 stmt_vec_info caster;
343 };
344
345 inline vect_unpromoted_value::vect_unpromoted_value ()
346 : op (NULL_TREE),
347 type (NULL_TREE),
348 dt (vect_uninitialized_def),
349 caster (NULL)
350 {
351 }
352
353 /* Set the operand to OP_IN, its definition type to DT_IN, and the
354 statement that casts it to CASTER_IN. */
355
356 inline void
357 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
358 stmt_vec_info caster_in)
359 {
360 op = op_in;
361 type = TREE_TYPE (op);
362 dt = dt_in;
363 caster = caster_in;
364 }
365
366 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
367 to reach some vectorizable inner operand OP', continuing as long as it
368 is possible to convert OP' back to OP using a possible sign change
369 followed by a possible promotion P. Return this OP', or null if OP is
370 not a vectorizable SSA name. If there is a promotion P, describe its
371 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
372 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
373 have more than one user.
374
375 A successful return means that it is possible to go from OP' to OP
376 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
377 whereas the cast from UNPROM to OP might be a promotion, a sign
378 change, or a nop.
379
380 E.g. say we have:
381
382 signed short *ptr = ...;
383 signed short C = *ptr;
384 unsigned short B = (unsigned short) C; // sign change
385 signed int A = (signed int) B; // unsigned promotion
386 ...possible other uses of A...
387 unsigned int OP = (unsigned int) A; // sign change
388
389 In this case it's possible to go directly from C to OP using:
390
391 OP = (unsigned int) (unsigned short) C;
392 +------------+ +--------------+
393 promotion sign change
394
395 so OP' would be C. The input to the promotion is B, so UNPROM
396 would describe B. */
397
398 static tree
399 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
400 vect_unpromoted_value *unprom,
401 bool *single_use_p = NULL)
402 {
403 tree op_type = TREE_TYPE (op);
404 if (!INTEGRAL_TYPE_P (op_type))
405 return NULL_TREE;
406
407 tree res = NULL_TREE;
408 unsigned int orig_precision = TYPE_PRECISION (op_type);
409 unsigned int min_precision = orig_precision;
410 stmt_vec_info caster = NULL;
411 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
412 {
413 /* See whether OP is simple enough to vectorize. */
414 stmt_vec_info def_stmt_info;
415 gimple *def_stmt;
416 vect_def_type dt;
417 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
418 break;
419
420 /* If OP is the input of a demotion, skip over it to see whether
421 OP is itself the result of a promotion. If so, the combined
422 effect of the promotion and the demotion might fit the required
423 pattern, otherwise neither operation fits.
424
425 This copes with cases such as the result of an arithmetic
426 operation being truncated before being stored, and where that
427 arithmetic operation has been recognized as an over-widened one. */
428 if (TYPE_PRECISION (op_type) <= min_precision)
429 {
430 /* Use OP as the UNPROM described above if we haven't yet
431 found a promotion, or if using the new input preserves the
432 sign of the previous promotion. */
433 if (!res
434 || TYPE_PRECISION (unprom->type) == orig_precision
435 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
436 {
437 unprom->set_op (op, dt, caster);
438 min_precision = TYPE_PRECISION (op_type);
439 }
440 /* Stop if we've already seen a promotion and if this
441 conversion does more than change the sign. */
442 else if (TYPE_PRECISION (op_type)
443 != TYPE_PRECISION (unprom->type))
444 break;
445
446 /* The sequence now extends to OP. */
447 res = op;
448 }
449
450 /* See whether OP is defined by a cast. Record it as CASTER if
451 the cast is potentially vectorizable. */
452 if (!def_stmt)
453 break;
454 caster = def_stmt_info;
455
456 /* Ignore pattern statements, since we don't link uses for them. */
457 if (caster
458 && single_use_p
459 && !STMT_VINFO_RELATED_STMT (caster)
460 && !has_single_use (res))
461 *single_use_p = false;
462
463 gassign *assign = dyn_cast <gassign *> (def_stmt);
464 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
465 break;
466
467 /* Continue with the input to the cast. */
468 op = gimple_assign_rhs1 (def_stmt);
469 op_type = TREE_TYPE (op);
470 }
471 return res;
472 }
473
474 /* OP is an integer operand to an operation that returns TYPE, and we
475 want to treat the operation as a widening one. So far we can treat
476 it as widening from *COMMON_TYPE.
477
478 Return true if OP is suitable for such a widening operation,
479 either widening from *COMMON_TYPE or from some supertype of it.
480 Update *COMMON_TYPE to the supertype in the latter case.
481
482 SHIFT_P is true if OP is a shift amount. */
483
484 static bool
485 vect_joust_widened_integer (tree type, bool shift_p, tree op,
486 tree *common_type)
487 {
488 /* Calculate the minimum precision required by OP, without changing
489 the sign of either operand. */
490 unsigned int precision;
491 if (shift_p)
492 {
493 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
494 return false;
495 precision = TREE_INT_CST_LOW (op);
496 }
497 else
498 {
499 precision = wi::min_precision (wi::to_widest (op),
500 TYPE_SIGN (*common_type));
501 if (precision * 2 > TYPE_PRECISION (type))
502 return false;
503 }
504
505 /* If OP requires a wider type, switch to that type. The checks
506 above ensure that this is still narrower than the result. */
507 precision = vect_element_precision (precision);
508 if (TYPE_PRECISION (*common_type) < precision)
509 *common_type = build_nonstandard_integer_type
510 (precision, TYPE_UNSIGNED (*common_type));
511 return true;
512 }
513
514 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
515 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
516
517 static bool
518 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
519 {
520 if (types_compatible_p (*common_type, new_type))
521 return true;
522
523 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
524 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
525 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
526 return true;
527
528 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
529 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
530 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
531 {
532 *common_type = new_type;
533 return true;
534 }
535
536 /* We have mismatched signs, with the signed type being
537 no wider than the unsigned type. In this case we need
538 a wider signed type. */
539 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
540 TYPE_PRECISION (new_type));
541 precision *= 2;
542
543 if (precision * 2 > TYPE_PRECISION (type))
544 return false;
545
546 *common_type = build_nonstandard_integer_type (precision, false);
547 return true;
548 }
549
550 /* Check whether STMT_INFO can be viewed as a tree of integer operations
551 in which each node either performs CODE or WIDENED_CODE, and where
552 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
553 specifies the maximum number of leaf operands. SHIFT_P says whether
554 CODE and WIDENED_CODE are some sort of shift.
555
556 If STMT_INFO is such a tree, return the number of leaf operands
557 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
558 to a type that (a) is narrower than the result of STMT_INFO and
559 (b) can hold all leaf operand values.
560
561 If SUBTYPE then allow that the signs of the operands
562 may differ in signs but not in precision. SUBTYPE is updated to reflect
563 this.
564
565 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
566 exists. */
567
568 static unsigned int
569 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
570 code_helper widened_code, bool shift_p,
571 unsigned int max_nops,
572 vect_unpromoted_value *unprom, tree *common_type,
573 enum optab_subtype *subtype = NULL)
574 {
575 /* Check for an integer operation with the right code. */
576 gimple* stmt = stmt_info->stmt;
577 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
578 return 0;
579
580 code_helper rhs_code;
581 if (is_gimple_assign (stmt))
582 rhs_code = gimple_assign_rhs_code (stmt);
583 else if (is_gimple_call (stmt))
584 rhs_code = gimple_call_combined_fn (stmt);
585 else
586 return 0;
587
588 if (rhs_code != code
589 && rhs_code != widened_code)
590 return 0;
591
592 tree lhs = gimple_get_lhs (stmt);
593 tree type = TREE_TYPE (lhs);
594 if (!INTEGRAL_TYPE_P (type))
595 return 0;
596
597 /* Assume that both operands will be leaf operands. */
598 max_nops -= 2;
599
600 /* Check the operands. */
601 unsigned int next_op = 0;
602 for (unsigned int i = 0; i < 2; ++i)
603 {
604 vect_unpromoted_value *this_unprom = &unprom[next_op];
605 unsigned int nops = 1;
606 tree op = gimple_arg (stmt, i);
607 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
608 {
609 /* We already have a common type from earlier operands.
610 Update it to account for OP. */
611 this_unprom->set_op (op, vect_constant_def);
612 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
613 return 0;
614 }
615 else
616 {
617 /* Only allow shifts by constants. */
618 if (shift_p && i == 1)
619 return 0;
620
621 if (rhs_code != code)
622 {
623 /* If rhs_code is widened_code, don't look through further
624 possible promotions, there is a promotion already embedded
625 in the WIDEN_*_EXPR. */
626 if (TREE_CODE (op) != SSA_NAME
627 || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
628 return 0;
629
630 stmt_vec_info def_stmt_info;
631 gimple *def_stmt;
632 vect_def_type dt;
633 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
634 &def_stmt))
635 return 0;
636 this_unprom->set_op (op, dt, NULL);
637 }
638 else if (!vect_look_through_possible_promotion (vinfo, op,
639 this_unprom))
640 return 0;
641
642 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
643 {
644 /* The operand isn't widened. If STMT_INFO has the code
645 for an unwidened operation, recursively check whether
646 this operand is a node of the tree. */
647 if (rhs_code != code
648 || max_nops == 0
649 || this_unprom->dt != vect_internal_def)
650 return 0;
651
652 /* Give back the leaf slot allocated above now that we're
653 not treating this as a leaf operand. */
654 max_nops += 1;
655
656 /* Recursively process the definition of the operand. */
657 stmt_vec_info def_stmt_info
658 = vinfo->lookup_def (this_unprom->op);
659 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
660 widened_code, shift_p, max_nops,
661 this_unprom, common_type,
662 subtype);
663 if (nops == 0)
664 return 0;
665
666 max_nops -= nops;
667 }
668 else
669 {
670 /* Make sure that the operand is narrower than the result. */
671 if (TYPE_PRECISION (this_unprom->type) * 2
672 > TYPE_PRECISION (type))
673 return 0;
674
675 /* Update COMMON_TYPE for the new operand. */
676 if (i == 0)
677 *common_type = this_unprom->type;
678 else if (!vect_joust_widened_type (type, this_unprom->type,
679 common_type))
680 {
681 if (subtype)
682 {
683 /* See if we can sign extend the smaller type. */
684 if (TYPE_PRECISION (this_unprom->type)
685 > TYPE_PRECISION (*common_type))
686 *common_type = this_unprom->type;
687 *subtype = optab_vector_mixed_sign;
688 }
689 else
690 return 0;
691 }
692 }
693 }
694 next_op += nops;
695 }
696 return next_op;
697 }
698
699 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
700 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
701
702 static tree
703 vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
704 {
705 return make_temp_ssa_name (type, stmt, "patt");
706 }
707
708 /* STMT2_INFO describes a type conversion that could be split into STMT1
709 followed by a version of STMT2_INFO that takes NEW_RHS as its first
710 input. Try to do this using pattern statements, returning true on
711 success. */
712
713 static bool
714 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
715 gimple *stmt1, tree vectype)
716 {
717 if (is_pattern_stmt_p (stmt2_info))
718 {
719 /* STMT2_INFO is part of a pattern. Get the statement to which
720 the pattern is attached. */
721 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
722 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
723
724 if (dump_enabled_p ())
725 dump_printf_loc (MSG_NOTE, vect_location,
726 "Splitting pattern statement: %G", stmt2_info->stmt);
727
728 /* Since STMT2_INFO is a pattern statement, we can change it
729 in-situ without worrying about changing the code for the
730 containing block. */
731 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
732
733 if (dump_enabled_p ())
734 {
735 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
736 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
737 stmt2_info->stmt);
738 }
739
740 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
741 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
742 /* STMT2_INFO is the actual pattern statement. Add STMT1
743 to the end of the definition sequence. */
744 gimple_seq_add_stmt_without_update (def_seq, stmt1);
745 else
746 {
747 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
748 before it. */
749 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
750 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
751 }
752 return true;
753 }
754 else
755 {
756 /* STMT2_INFO doesn't yet have a pattern. Try to create a
757 two-statement pattern now. */
758 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
759 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
760 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
761 if (!lhs_vectype)
762 return false;
763
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_NOTE, vect_location,
766 "Splitting statement: %G", stmt2_info->stmt);
767
768 /* Add STMT1 as a singleton pattern definition sequence. */
769 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
770 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
771 gimple_seq_add_stmt_without_update (def_seq, stmt1);
772
773 /* Build the second of the two pattern statements. */
774 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
775 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
776 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
777
778 if (dump_enabled_p ())
779 {
780 dump_printf_loc (MSG_NOTE, vect_location,
781 "into pattern statements: %G", stmt1);
782 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
783 (gimple *) new_stmt2);
784 }
785
786 return true;
787 }
788 }
789
790 /* Look for the following pattern
791 X = x[i]
792 Y = y[i]
793 DIFF = X - Y
794 DAD = ABS_EXPR<DIFF>
795
796 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
797 HALF_TYPE and UNPROM will be set should the statement be found to
798 be a widened operation.
799 DIFF_STMT will be set to the MINUS_EXPR
800 statement that precedes the ABS_STMT unless vect_widened_op_tree
801 succeeds.
802 */
803 static bool
804 vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
805 tree *half_type,
806 vect_unpromoted_value unprom[2],
807 gassign **diff_stmt)
808 {
809 if (!abs_stmt)
810 return false;
811
812 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
813 inside the loop (in case we are analyzing an outer-loop). */
814 enum tree_code code = gimple_assign_rhs_code (abs_stmt);
815 if (code != ABS_EXPR && code != ABSU_EXPR)
816 return false;
817
818 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
819 tree abs_type = TREE_TYPE (abs_oprnd);
820 if (!abs_oprnd)
821 return false;
822 if (!ANY_INTEGRAL_TYPE_P (abs_type)
823 || TYPE_OVERFLOW_WRAPS (abs_type)
824 || TYPE_UNSIGNED (abs_type))
825 return false;
826
827 /* Peel off conversions from the ABS input. This can involve sign
828 changes (e.g. from an unsigned subtraction to a signed ABS input)
829 or signed promotion, but it can't include unsigned promotion.
830 (Note that ABS of an unsigned promotion should have been folded
831 away before now anyway.) */
832 vect_unpromoted_value unprom_diff;
833 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
834 &unprom_diff);
835 if (!abs_oprnd)
836 return false;
837 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
838 && TYPE_UNSIGNED (unprom_diff.type))
839 return false;
840
841 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
842 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
843 if (!diff_stmt_vinfo)
844 return false;
845
846 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
847 inside the loop (in case we are analyzing an outer-loop). */
848 if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
849 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
850 false, 2, unprom, half_type))
851 return true;
852
853 /* Failed to find a widen operation so we check for a regular MINUS_EXPR. */
854 gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
855 if (diff_stmt && diff
856 && gimple_assign_rhs_code (diff) == MINUS_EXPR
857 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
858 {
859 *diff_stmt = diff;
860 *half_type = NULL_TREE;
861 return true;
862 }
863
864 return false;
865 }
866
867 /* Convert UNPROM to TYPE and return the result, adding new statements
868 to STMT_INFO's pattern definition statements if no better way is
869 available. VECTYPE is the vector form of TYPE.
870
871 If SUBTYPE then convert the type based on the subtype. */
872
873 static tree
874 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
875 vect_unpromoted_value *unprom, tree vectype,
876 enum optab_subtype subtype = optab_default)
877 {
878 /* Update the type if the signs differ. */
879 if (subtype == optab_vector_mixed_sign)
880 {
881 gcc_assert (!TYPE_UNSIGNED (type));
882 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
883 {
884 type = unsigned_type_for (type);
885 vectype = unsigned_type_for (vectype);
886 }
887 }
888
889 /* Check for a no-op conversion. */
890 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
891 return unprom->op;
892
893 /* Allow the caller to create constant vect_unpromoted_values. */
894 if (TREE_CODE (unprom->op) == INTEGER_CST)
895 return wide_int_to_tree (type, wi::to_widest (unprom->op));
896
897 tree input = unprom->op;
898 if (unprom->caster)
899 {
900 tree lhs = gimple_get_lhs (unprom->caster->stmt);
901 tree lhs_type = TREE_TYPE (lhs);
902
903 /* If the result of the existing cast is the right width, use it
904 instead of the source of the cast. */
905 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
906 input = lhs;
907 /* If the precision we want is between the source and result
908 precisions of the existing cast, try splitting the cast into
909 two and tapping into a mid-way point. */
910 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
911 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
912 {
913 /* In order to preserve the semantics of the original cast,
914 give the mid-way point the same signedness as the input value.
915
916 It would be possible to use a signed type here instead if
917 TYPE is signed and UNPROM->TYPE is unsigned, but that would
918 make the sign of the midtype sensitive to the order in
919 which we process the statements, since the signedness of
920 TYPE is the signedness required by just one of possibly
921 many users. Also, unsigned promotions are usually as cheap
922 as or cheaper than signed ones, so it's better to keep an
923 unsigned promotion. */
924 tree midtype = build_nonstandard_integer_type
925 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
926 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
927 if (vec_midtype)
928 {
929 input = vect_recog_temp_ssa_var (midtype, NULL);
930 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
931 unprom->op);
932 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
933 vec_midtype))
934 append_pattern_def_seq (vinfo, stmt_info,
935 new_stmt, vec_midtype);
936 }
937 }
938
939 /* See if we can reuse an existing result. */
940 if (types_compatible_p (type, TREE_TYPE (input)))
941 return input;
942 }
943
944 /* We need a new conversion statement. */
945 tree new_op = vect_recog_temp_ssa_var (type, NULL);
946 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
947
948 /* If OP is an external value, see if we can insert the new statement
949 on an incoming edge. */
950 if (input == unprom->op && unprom->dt == vect_external_def)
951 if (edge e = vect_get_external_def_edge (vinfo, input))
952 {
953 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
954 gcc_assert (!new_bb);
955 return new_op;
956 }
957
958 /* As a (common) last resort, add the statement to the pattern itself. */
959 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
960 return new_op;
961 }
962
963 /* Invoke vect_convert_input for N elements of UNPROM and store the
964 result in the corresponding elements of RESULT.
965
966 If SUBTYPE then convert the type based on the subtype. */
967
968 static void
969 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
970 tree *result, tree type, vect_unpromoted_value *unprom,
971 tree vectype, enum optab_subtype subtype = optab_default)
972 {
973 for (unsigned int i = 0; i < n; ++i)
974 {
975 unsigned int j;
976 for (j = 0; j < i; ++j)
977 if (unprom[j].op == unprom[i].op)
978 break;
979
980 if (j < i)
981 result[i] = result[j];
982 else
983 result[i] = vect_convert_input (vinfo, stmt_info,
984 type, &unprom[i], vectype, subtype);
985 }
986 }
987
988 /* The caller has created a (possibly empty) sequence of pattern definition
989 statements followed by a single statement PATTERN_STMT. Cast the result
990 of this final statement to TYPE. If a new statement is needed, add
991 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
992 and return the new statement, otherwise return PATTERN_STMT as-is.
993 VECITYPE is the vector form of PATTERN_STMT's result type. */
994
995 static gimple *
996 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
997 gimple *pattern_stmt, tree vecitype)
998 {
999 tree lhs = gimple_get_lhs (pattern_stmt);
1000 if (!types_compatible_p (type, TREE_TYPE (lhs)))
1001 {
1002 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
1003 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
1004 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
1005 }
1006 return pattern_stmt;
1007 }
1008
1009 /* Return true if STMT_VINFO describes a reduction for which reassociation
1010 is allowed. If STMT_INFO is part of a group, assume that it's part of
1011 a reduction chain and optimistically assume that all statements
1012 except the last allow reassociation.
1013 Also require it to have code CODE and to be a reduction
1014 in the outermost loop. When returning true, store the operands in
1015 *OP0_OUT and *OP1_OUT. */
1016
1017 static bool
1018 vect_reassociating_reduction_p (vec_info *vinfo,
1019 stmt_vec_info stmt_info, tree_code code,
1020 tree *op0_out, tree *op1_out)
1021 {
1022 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
1023 if (!loop_info)
1024 return false;
1025
1026 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
1027 if (!assign || gimple_assign_rhs_code (assign) != code)
1028 return false;
1029
1030 /* We don't allow changing the order of the computation in the inner-loop
1031 when doing outer-loop vectorization. */
1032 class loop *loop = LOOP_VINFO_LOOP (loop_info);
1033 if (loop && nested_in_vect_loop_p (loop, stmt_info))
1034 return false;
1035
1036 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1037 {
1038 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
1039 code))
1040 return false;
1041 }
1042 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
1043 return false;
1044
1045 *op0_out = gimple_assign_rhs1 (assign);
1046 *op1_out = gimple_assign_rhs2 (assign);
1047 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
1048 std::swap (*op0_out, *op1_out);
1049 return true;
1050 }
1051
1052 /* match.pd function to match
1053 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1054 with conditions:
1055 1) @1, @2, c, d, a, b are all integral type.
1056 2) There's single_use for both @1 and @2.
1057 3) a, c have same precision.
1058 4) c and @1 have different precision.
1059 5) c, d are the same type or they can differ in sign when convert is
1060 truncation.
1061
1062 record a and c and d and @3. */
1063
1064 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
1065
1066 /* Function vect_recog_cond_expr_convert
1067
1068 Try to find the following pattern:
1069
1070 TYPE_AB A,B;
1071 TYPE_CD C,D;
1072 TYPE_E E;
1073 TYPE_E op_true = (TYPE_E) A;
1074 TYPE_E op_false = (TYPE_E) B;
1075
1076 E = C cmp D ? op_true : op_false;
1077
1078 where
1079 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1080 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1081 single_use of op_true and op_false.
1082 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1083
1084 Input:
1085
1086 * STMT_VINFO: The stmt from which the pattern search begins.
1087 here it starts with E = c cmp D ? op_true : op_false;
1088
1089 Output:
1090
1091 TYPE1 E' = C cmp D ? A : B;
1092 TYPE3 E = (TYPE3) E';
1093
1094 There may extra nop_convert for A or B to handle different signness.
1095
1096 * TYPE_OUT: The vector type of the output of this pattern.
1097
1098 * Return value: A new stmt that will be used to replace the sequence of
1099 stmts that constitute the pattern. In this case it will be:
1100 E = (TYPE3)E';
1101 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1102
1103 static gimple *
1104 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1105 stmt_vec_info stmt_vinfo, tree *type_out)
1106 {
1107 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1108 tree lhs, match[4], temp, type, new_lhs, op2;
1109 gimple *cond_stmt;
1110 gimple *pattern_stmt;
1111
1112 if (!last_stmt)
1113 return NULL;
1114
1115 lhs = gimple_assign_lhs (last_stmt);
1116
1117 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1118 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1119 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1120 return NULL;
1121
1122 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1123
1124 op2 = match[2];
1125 type = TREE_TYPE (match[1]);
1126 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1127 {
1128 op2 = vect_recog_temp_ssa_var (type, NULL);
1129 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1130 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1131 get_vectype_for_scalar_type (vinfo, type));
1132 }
1133
1134 temp = vect_recog_temp_ssa_var (type, NULL);
1135 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1136 match[1], op2));
1137 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1138 get_vectype_for_scalar_type (vinfo, type));
1139 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1140 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1141 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1142
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "created pattern stmt: %G", pattern_stmt);
1146 return pattern_stmt;
1147 }
1148
1149 /* Function vect_recog_dot_prod_pattern
1150
1151 Try to find the following pattern:
1152
1153 type1a x_t
1154 type1b y_t;
1155 TYPE1 prod;
1156 TYPE2 sum = init;
1157 loop:
1158 sum_0 = phi <init, sum_1>
1159 S1 x_t = ...
1160 S2 y_t = ...
1161 S3 x_T = (TYPE1) x_t;
1162 S4 y_T = (TYPE1) y_t;
1163 S5 prod = x_T * y_T;
1164 [S6 prod = (TYPE2) prod; #optional]
1165 S7 sum_1 = prod + sum_0;
1166
1167 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1168 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1169 'type1a' and 'type1b' can differ.
1170
1171 Input:
1172
1173 * STMT_VINFO: The stmt from which the pattern search begins. In the
1174 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1175 will be detected.
1176
1177 Output:
1178
1179 * TYPE_OUT: The type of the output of this pattern.
1180
1181 * Return value: A new stmt that will be used to replace the sequence of
1182 stmts that constitute the pattern. In this case it will be:
1183 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1184
1185 Note: The dot-prod idiom is a widening reduction pattern that is
1186 vectorized without preserving all the intermediate results. It
1187 produces only N/2 (widened) results (by summing up pairs of
1188 intermediate results) rather than all N results. Therefore, we
1189 cannot allow this pattern when we want to get all the results and in
1190 the correct order (as is the case when this computation is in an
1191 inner-loop nested in an outer-loop that us being vectorized). */
1192
1193 static gimple *
1194 vect_recog_dot_prod_pattern (vec_info *vinfo,
1195 stmt_vec_info stmt_vinfo, tree *type_out)
1196 {
1197 tree oprnd0, oprnd1;
1198 gimple *last_stmt = stmt_vinfo->stmt;
1199 tree type, half_type;
1200 gimple *pattern_stmt;
1201 tree var;
1202
1203 /* Look for the following pattern
1204 DX = (TYPE1) X;
1205 DY = (TYPE1) Y;
1206 DPROD = DX * DY;
1207 DDPROD = (TYPE2) DPROD;
1208 sum_1 = DDPROD + sum_0;
1209 In which
1210 - DX is double the size of X
1211 - DY is double the size of Y
1212 - DX, DY, DPROD all have the same type but the sign
1213 between X, Y and DPROD can differ.
1214 - sum is the same size of DPROD or bigger
1215 - sum has been recognized as a reduction variable.
1216
1217 This is equivalent to:
1218 DPROD = X w* Y; #widen mult
1219 sum_1 = DPROD w+ sum_0; #widen summation
1220 or
1221 DPROD = X w* Y; #widen mult
1222 sum_1 = DPROD + sum_0; #summation
1223 */
1224
1225 /* Starting from LAST_STMT, follow the defs of its uses in search
1226 of the above pattern. */
1227
1228 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1229 &oprnd0, &oprnd1))
1230 return NULL;
1231
1232 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1233
1234 vect_unpromoted_value unprom_mult;
1235 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1236
1237 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1238 we know that oprnd1 is the reduction variable (defined by a loop-header
1239 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1240 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1241 if (!oprnd0)
1242 return NULL;
1243
1244 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1245 if (!mult_vinfo)
1246 return NULL;
1247
1248 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1249 inside the loop (in case we are analyzing an outer-loop). */
1250 vect_unpromoted_value unprom0[2];
1251 enum optab_subtype subtype = optab_vector;
1252 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1253 false, 2, unprom0, &half_type, &subtype))
1254 return NULL;
1255
1256 /* If there are two widening operations, make sure they agree on the sign
1257 of the extension. The result of an optab_vector_mixed_sign operation
1258 is signed; otherwise, the result has the same sign as the operands. */
1259 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1260 && (subtype == optab_vector_mixed_sign
1261 ? TYPE_UNSIGNED (unprom_mult.type)
1262 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1263 return NULL;
1264
1265 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1266
1267 /* If the inputs have mixed signs, canonicalize on using the signed
1268 input type for analysis. This also helps when emulating mixed-sign
1269 operations using signed operations. */
1270 if (subtype == optab_vector_mixed_sign)
1271 half_type = signed_type_for (half_type);
1272
1273 tree half_vectype;
1274 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1275 type_out, &half_vectype, subtype))
1276 {
1277 /* We can emulate a mixed-sign dot-product using a sequence of
1278 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1279 if (subtype != optab_vector_mixed_sign
1280 || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1281 DOT_PROD_EXPR, half_type,
1282 type_out, &half_vectype,
1283 optab_vector))
1284 return NULL;
1285
1286 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1287 *type_out);
1288 }
1289
1290 /* Get the inputs in the appropriate types. */
1291 tree mult_oprnd[2];
1292 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1293 unprom0, half_vectype, subtype);
1294
1295 var = vect_recog_temp_ssa_var (type, NULL);
1296 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1297 mult_oprnd[0], mult_oprnd[1], oprnd1);
1298
1299 return pattern_stmt;
1300 }
1301
1302
1303 /* Function vect_recog_sad_pattern
1304
1305 Try to find the following Sum of Absolute Difference (SAD) pattern:
1306
1307 type x_t, y_t;
1308 signed TYPE1 diff, abs_diff;
1309 TYPE2 sum = init;
1310 loop:
1311 sum_0 = phi <init, sum_1>
1312 S1 x_t = ...
1313 S2 y_t = ...
1314 S3 x_T = (TYPE1) x_t;
1315 S4 y_T = (TYPE1) y_t;
1316 S5 diff = x_T - y_T;
1317 S6 abs_diff = ABS_EXPR <diff>;
1318 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1319 S8 sum_1 = abs_diff + sum_0;
1320
1321 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1322 same size of 'TYPE1' or bigger. This is a special case of a reduction
1323 computation.
1324
1325 Input:
1326
1327 * STMT_VINFO: The stmt from which the pattern search begins. In the
1328 example, when this function is called with S8, the pattern
1329 {S3,S4,S5,S6,S7,S8} will be detected.
1330
1331 Output:
1332
1333 * TYPE_OUT: The type of the output of this pattern.
1334
1335 * Return value: A new stmt that will be used to replace the sequence of
1336 stmts that constitute the pattern. In this case it will be:
1337 SAD_EXPR <x_t, y_t, sum_0>
1338 */
1339
1340 static gimple *
1341 vect_recog_sad_pattern (vec_info *vinfo,
1342 stmt_vec_info stmt_vinfo, tree *type_out)
1343 {
1344 gimple *last_stmt = stmt_vinfo->stmt;
1345 tree half_type;
1346
1347 /* Look for the following pattern
1348 DX = (TYPE1) X;
1349 DY = (TYPE1) Y;
1350 DDIFF = DX - DY;
1351 DAD = ABS_EXPR <DDIFF>;
1352 DDPROD = (TYPE2) DPROD;
1353 sum_1 = DAD + sum_0;
1354 In which
1355 - DX is at least double the size of X
1356 - DY is at least double the size of Y
1357 - DX, DY, DDIFF, DAD all have the same type
1358 - sum is the same size of DAD or bigger
1359 - sum has been recognized as a reduction variable.
1360
1361 This is equivalent to:
1362 DDIFF = X w- Y; #widen sub
1363 DAD = ABS_EXPR <DDIFF>;
1364 sum_1 = DAD w+ sum_0; #widen summation
1365 or
1366 DDIFF = X w- Y; #widen sub
1367 DAD = ABS_EXPR <DDIFF>;
1368 sum_1 = DAD + sum_0; #summation
1369 */
1370
1371 /* Starting from LAST_STMT, follow the defs of its uses in search
1372 of the above pattern. */
1373
1374 tree plus_oprnd0, plus_oprnd1;
1375 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1376 &plus_oprnd0, &plus_oprnd1))
1377 return NULL;
1378
1379 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1380
1381 /* Any non-truncating sequence of conversions is OK here, since
1382 with a successful match, the result of the ABS(U) is known to fit
1383 within the nonnegative range of the result type. (It cannot be the
1384 negative of the minimum signed value due to the range of the widening
1385 MINUS_EXPR.) */
1386 vect_unpromoted_value unprom_abs;
1387 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1388 &unprom_abs);
1389
1390 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1391 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1392 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1393 Then check that plus_oprnd0 is defined by an abs_expr. */
1394
1395 if (!plus_oprnd0)
1396 return NULL;
1397
1398 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1399 if (!abs_stmt_vinfo)
1400 return NULL;
1401
1402 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1403 inside the loop (in case we are analyzing an outer-loop). */
1404 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1405 vect_unpromoted_value unprom[2];
1406
1407 if (!abs_stmt)
1408 {
1409 gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
1410 if (!abd_stmt
1411 || !gimple_call_internal_p (abd_stmt)
1412 || gimple_call_num_args (abd_stmt) != 2)
1413 return NULL;
1414
1415 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1416 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1417
1418 if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
1419 {
1420 if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
1421 &unprom[0])
1422 || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
1423 &unprom[1]))
1424 return NULL;
1425 }
1426 else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
1427 {
1428 unprom[0].op = abd_oprnd0;
1429 unprom[0].type = TREE_TYPE (abd_oprnd0);
1430 unprom[1].op = abd_oprnd1;
1431 unprom[1].type = TREE_TYPE (abd_oprnd1);
1432 }
1433 else
1434 return NULL;
1435
1436 half_type = unprom[0].type;
1437 }
1438 else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
1439 unprom, NULL))
1440 return NULL;
1441
1442 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1443
1444 tree half_vectype;
1445 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1446 type_out, &half_vectype))
1447 return NULL;
1448
1449 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1450 tree sad_oprnd[2];
1451 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1452 unprom, half_vectype);
1453
1454 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1455 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1456 sad_oprnd[1], plus_oprnd1);
1457
1458 return pattern_stmt;
1459 }
1460
1461 /* Function vect_recog_abd_pattern
1462
1463 Try to find the following ABsolute Difference (ABD) or
1464 widening ABD (WIDEN_ABD) pattern:
1465
1466 TYPE1 x;
1467 TYPE2 y;
1468 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1469 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1470 TYPE3 diff = x_cast - y_cast;
1471 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1472 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1473
1474 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1475 twice as wide as TYPE3.
1476
1477 Input:
1478
1479 * STMT_VINFO: The stmt from which the pattern search begins
1480
1481 Output:
1482
1483 * TYPE_OUT: The type of the output of this pattern
1484
1485 * Return value: A new stmt that will be used to replace the sequence of
1486 stmts that constitute the pattern, principally:
1487 out = IFN_ABD (x, y)
1488 out = IFN_WIDEN_ABD (x, y)
1489 */
1490
1491 static gimple *
1492 vect_recog_abd_pattern (vec_info *vinfo,
1493 stmt_vec_info stmt_vinfo, tree *type_out)
1494 {
1495 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1496 if (!last_stmt)
1497 return NULL;
1498
1499 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1500
1501 vect_unpromoted_value unprom[2];
1502 gassign *diff_stmt;
1503 tree half_type;
1504 if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
1505 unprom, &diff_stmt))
1506 return NULL;
1507
1508 tree abd_in_type, abd_out_type;
1509
1510 if (half_type)
1511 {
1512 abd_in_type = half_type;
1513 abd_out_type = abd_in_type;
1514 }
1515 else
1516 {
1517 unprom[0].op = gimple_assign_rhs1 (diff_stmt);
1518 unprom[1].op = gimple_assign_rhs2 (diff_stmt);
1519 abd_in_type = signed_type_for (out_type);
1520 abd_out_type = abd_in_type;
1521 }
1522
1523 tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
1524 if (!vectype_in)
1525 return NULL;
1526
1527 internal_fn ifn = IFN_ABD;
1528 tree vectype_out = vectype_in;
1529
1530 if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
1531 && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
1532 {
1533 tree mid_type
1534 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
1535 TYPE_UNSIGNED (abd_in_type));
1536 tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
1537
1538 code_helper dummy_code;
1539 int dummy_int;
1540 auto_vec<tree> dummy_vec;
1541 if (mid_vectype
1542 && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
1543 stmt_vinfo, mid_vectype,
1544 vectype_in,
1545 &dummy_code, &dummy_code,
1546 &dummy_int, &dummy_vec))
1547 {
1548 ifn = IFN_VEC_WIDEN_ABD;
1549 abd_out_type = mid_type;
1550 vectype_out = mid_vectype;
1551 }
1552 }
1553
1554 if (ifn == IFN_ABD
1555 && !direct_internal_fn_supported_p (ifn, vectype_in,
1556 OPTIMIZE_FOR_SPEED))
1557 return NULL;
1558
1559 vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
1560
1561 tree abd_oprnds[2];
1562 vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
1563 abd_in_type, unprom, vectype_in);
1564
1565 *type_out = get_vectype_for_scalar_type (vinfo, out_type);
1566
1567 tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
1568 gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
1569 abd_oprnds[0], abd_oprnds[1]);
1570 gimple_call_set_lhs (abd_stmt, abd_result);
1571 gimple_set_location (abd_stmt, gimple_location (last_stmt));
1572
1573 gimple *stmt = abd_stmt;
1574 if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
1575 && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
1576 && !TYPE_UNSIGNED (abd_out_type))
1577 {
1578 tree unsign = unsigned_type_for (abd_out_type);
1579 tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
1580 stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
1581 unsign_vectype);
1582 }
1583
1584 return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
1585 }
1586
1587 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1588 so that it can be treated as though it had the form:
1589
1590 A_TYPE a;
1591 B_TYPE b;
1592 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1593 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1594 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1595 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1596 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1597
1598 Try to replace the pattern with:
1599
1600 A_TYPE a;
1601 B_TYPE b;
1602 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1603 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1604 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1605 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1606
1607 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1608
1609 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1610 name of the pattern being matched, for dump purposes. */
1611
1612 static gimple *
1613 vect_recog_widen_op_pattern (vec_info *vinfo,
1614 stmt_vec_info last_stmt_info, tree *type_out,
1615 tree_code orig_code, code_helper wide_code,
1616 bool shift_p, const char *name)
1617 {
1618 gimple *last_stmt = last_stmt_info->stmt;
1619
1620 vect_unpromoted_value unprom[2];
1621 tree half_type;
1622 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1623 shift_p, 2, unprom, &half_type))
1624
1625 return NULL;
1626
1627 /* Pattern detected. */
1628 vect_pattern_detected (name, last_stmt);
1629
1630 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1631 tree itype = type;
1632 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1633 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1634 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1635 TYPE_UNSIGNED (half_type));
1636
1637 /* Check target support */
1638 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1639 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1640 tree ctype = itype;
1641 tree vecctype = vecitype;
1642 if (orig_code == MINUS_EXPR
1643 && TYPE_UNSIGNED (itype)
1644 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1645 {
1646 /* Subtraction is special, even if half_type is unsigned and no matter
1647 whether type is signed or unsigned, if type is wider than itype,
1648 we need to sign-extend from the widening operation result to the
1649 result type.
1650 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1651 itype unsigned short and type either int or unsigned int.
1652 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1653 (unsigned short) 0xffff, but for type int we want the result -1
1654 and for type unsigned int 0xffffffff rather than 0xffff. */
1655 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1656 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1657 }
1658
1659 code_helper dummy_code;
1660 int dummy_int;
1661 auto_vec<tree> dummy_vec;
1662 if (!vectype
1663 || !vecitype
1664 || !vecctype
1665 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1666 vecitype, vectype,
1667 &dummy_code, &dummy_code,
1668 &dummy_int, &dummy_vec))
1669 return NULL;
1670
1671 *type_out = get_vectype_for_scalar_type (vinfo, type);
1672 if (!*type_out)
1673 return NULL;
1674
1675 tree oprnd[2];
1676 vect_convert_inputs (vinfo, last_stmt_info,
1677 2, oprnd, half_type, unprom, vectype);
1678
1679 tree var = vect_recog_temp_ssa_var (itype, NULL);
1680 gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
1681
1682 if (vecctype != vecitype)
1683 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1684 pattern_stmt, vecitype);
1685
1686 return vect_convert_output (vinfo, last_stmt_info,
1687 type, pattern_stmt, vecctype);
1688 }
1689
1690 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1691 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1692
1693 static gimple *
1694 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1695 tree *type_out)
1696 {
1697 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1698 MULT_EXPR, WIDEN_MULT_EXPR, false,
1699 "vect_recog_widen_mult_pattern");
1700 }
1701
1702 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1703 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1704
1705 static gimple *
1706 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1707 tree *type_out)
1708 {
1709 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1710 PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
1711 false, "vect_recog_widen_plus_pattern");
1712 }
1713
1714 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1715 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1716 static gimple *
1717 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1718 tree *type_out)
1719 {
1720 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1721 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
1722 false, "vect_recog_widen_minus_pattern");
1723 }
1724
1725 /* Try to detect abd on widened inputs, converting IFN_ABD
1726 to IFN_VEC_WIDEN_ABD. */
1727 static gimple *
1728 vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1729 tree *type_out)
1730 {
1731 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1732 if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
1733 return NULL;
1734
1735 tree last_rhs = gimple_assign_rhs1 (last_stmt);
1736
1737 tree in_type = TREE_TYPE (last_rhs);
1738 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1739 if (!INTEGRAL_TYPE_P (in_type)
1740 || !INTEGRAL_TYPE_P (out_type)
1741 || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
1742 || !TYPE_UNSIGNED (in_type))
1743 return NULL;
1744
1745 vect_unpromoted_value unprom;
1746 tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
1747 if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
1748 return NULL;
1749
1750 stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
1751 if (!abd_pattern_vinfo)
1752 return NULL;
1753
1754 abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
1755 gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
1756 if (!abd_stmt
1757 || !gimple_call_internal_p (abd_stmt)
1758 || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
1759 return NULL;
1760
1761 tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
1762 tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
1763
1764 code_helper dummy_code;
1765 int dummy_int;
1766 auto_vec<tree> dummy_vec;
1767 if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
1768 vectype_out, vectype_in,
1769 &dummy_code, &dummy_code,
1770 &dummy_int, &dummy_vec))
1771 return NULL;
1772
1773 vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
1774
1775 *type_out = vectype_out;
1776
1777 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1778 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1779 tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
1780 gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
1781 abd_oprnd0, abd_oprnd1);
1782 gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
1783 gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
1784 return widen_abd_stmt;
1785 }
1786
1787 /* Function vect_recog_ctz_ffs_pattern
1788
1789 Try to find the following pattern:
1790
1791 TYPE1 A;
1792 TYPE1 B;
1793
1794 B = __builtin_ctz{,l,ll} (A);
1795
1796 or
1797
1798 B = __builtin_ffs{,l,ll} (A);
1799
1800 Input:
1801
1802 * STMT_VINFO: The stmt from which the pattern search begins.
1803 here it starts with B = __builtin_* (A);
1804
1805 Output:
1806
1807 * TYPE_OUT: The vector type of the output of this pattern.
1808
1809 * Return value: A new stmt that will be used to replace the sequence of
1810 stmts that constitute the pattern, using clz or popcount builtins. */
1811
1812 static gimple *
1813 vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1814 tree *type_out)
1815 {
1816 gimple *call_stmt = stmt_vinfo->stmt;
1817 gimple *pattern_stmt;
1818 tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1819 tree new_var;
1820 internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1821 bool defined_at_zero = true, defined_at_zero_new = false;
1822 int val = 0, val_new = 0, val_cmp = 0;
1823 int prec;
1824 int sub = 0, add = 0;
1825 location_t loc;
1826
1827 if (!is_gimple_call (call_stmt))
1828 return NULL;
1829
1830 if (gimple_call_num_args (call_stmt) != 1
1831 && gimple_call_num_args (call_stmt) != 2)
1832 return NULL;
1833
1834 rhs_oprnd = gimple_call_arg (call_stmt, 0);
1835 rhs_type = TREE_TYPE (rhs_oprnd);
1836 lhs_oprnd = gimple_call_lhs (call_stmt);
1837 if (!lhs_oprnd)
1838 return NULL;
1839 lhs_type = TREE_TYPE (lhs_oprnd);
1840 if (!INTEGRAL_TYPE_P (lhs_type)
1841 || !INTEGRAL_TYPE_P (rhs_type)
1842 || !type_has_mode_precision_p (rhs_type)
1843 || TREE_CODE (rhs_oprnd) != SSA_NAME)
1844 return NULL;
1845
1846 switch (gimple_call_combined_fn (call_stmt))
1847 {
1848 CASE_CFN_CTZ:
1849 ifn = IFN_CTZ;
1850 if (!gimple_call_internal_p (call_stmt)
1851 || gimple_call_num_args (call_stmt) != 2)
1852 defined_at_zero = false;
1853 else
1854 val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
1855 break;
1856 CASE_CFN_FFS:
1857 ifn = IFN_FFS;
1858 break;
1859 default:
1860 return NULL;
1861 }
1862
1863 prec = TYPE_PRECISION (rhs_type);
1864 loc = gimple_location (call_stmt);
1865
1866 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1867 if (!vec_type)
1868 return NULL;
1869
1870 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1871 if (!vec_rhs_type)
1872 return NULL;
1873
1874 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1875 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1876 popcount<vector_mode>2. */
1877 if (!vec_type
1878 || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1879 OPTIMIZE_FOR_SPEED))
1880 return NULL;
1881
1882 if (ifn == IFN_FFS
1883 && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1884 OPTIMIZE_FOR_SPEED))
1885 {
1886 ifnnew = IFN_CTZ;
1887 defined_at_zero_new
1888 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1889 val_new) == 2;
1890 }
1891 else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1892 OPTIMIZE_FOR_SPEED))
1893 {
1894 ifnnew = IFN_CLZ;
1895 defined_at_zero_new
1896 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1897 val_new) == 2;
1898 }
1899 if ((ifnnew == IFN_LAST
1900 || (defined_at_zero && !defined_at_zero_new))
1901 && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1902 OPTIMIZE_FOR_SPEED))
1903 {
1904 ifnnew = IFN_POPCOUNT;
1905 defined_at_zero_new = true;
1906 val_new = prec;
1907 }
1908 if (ifnnew == IFN_LAST)
1909 return NULL;
1910
1911 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
1912
1913 val_cmp = val_new;
1914 if ((ifnnew == IFN_CLZ
1915 && defined_at_zero
1916 && defined_at_zero_new
1917 && val == prec
1918 && val_new == prec)
1919 || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1920 {
1921 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1922 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1923 if (ifnnew == IFN_CLZ)
1924 sub = prec;
1925 val_cmp = prec;
1926
1927 if (!TYPE_UNSIGNED (rhs_type))
1928 {
1929 rhs_type = unsigned_type_for (rhs_type);
1930 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1931 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1932 pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1933 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
1934 vec_rhs_type);
1935 rhs_oprnd = new_var;
1936 }
1937
1938 tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
1939 pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1940 build_int_cst (rhs_type, -1));
1941 gimple_set_location (pattern_stmt, loc);
1942 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1943
1944 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1945 pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1946 gimple_set_location (pattern_stmt, loc);
1947 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1948 rhs_oprnd = new_var;
1949
1950 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1951 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1952 m1, rhs_oprnd);
1953 gimple_set_location (pattern_stmt, loc);
1954 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1955 rhs_oprnd = new_var;
1956 }
1957 else if (ifnnew == IFN_CLZ)
1958 {
1959 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1960 .FFS (X) = PREC - .CLZ (X & -X). */
1961 sub = prec - (ifn == IFN_CTZ);
1962 val_cmp = sub - val_new;
1963
1964 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1965 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1966 gimple_set_location (pattern_stmt, loc);
1967 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1968
1969 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1970 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1971 rhs_oprnd, neg);
1972 gimple_set_location (pattern_stmt, loc);
1973 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1974 rhs_oprnd = new_var;
1975 }
1976 else if (ifnnew == IFN_POPCOUNT)
1977 {
1978 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1979 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1980 sub = prec + (ifn == IFN_FFS);
1981 val_cmp = sub;
1982
1983 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1984 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1985 gimple_set_location (pattern_stmt, loc);
1986 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1987
1988 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1989 pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
1990 rhs_oprnd, neg);
1991 gimple_set_location (pattern_stmt, loc);
1992 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1993 rhs_oprnd = new_var;
1994 }
1995 else if (ifnnew == IFN_CTZ)
1996 {
1997 /* .FFS (X) = .CTZ (X) + 1. */
1998 add = 1;
1999 val_cmp++;
2000 }
2001
2002 /* Create B = .IFNNEW (A). */
2003 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2004 if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
2005 pattern_stmt
2006 = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
2007 build_int_cst (integer_type_node,
2008 val_new));
2009 else
2010 pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
2011 gimple_call_set_lhs (pattern_stmt, new_var);
2012 gimple_set_location (pattern_stmt, loc);
2013 *type_out = vec_type;
2014
2015 if (sub)
2016 {
2017 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2018 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2019 pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
2020 build_int_cst (lhs_type, sub),
2021 new_var);
2022 gimple_set_location (pattern_stmt, loc);
2023 new_var = ret_var;
2024 }
2025 else if (add)
2026 {
2027 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2028 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2029 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2030 build_int_cst (lhs_type, add));
2031 gimple_set_location (pattern_stmt, loc);
2032 new_var = ret_var;
2033 }
2034
2035 if (defined_at_zero
2036 && (!defined_at_zero_new || val != val_cmp))
2037 {
2038 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2039 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2040 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2041 rhs_type = TREE_TYPE (rhs_oprnd);
2042 tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node,
2043 rhs_oprnd, build_zero_cst (rhs_type));
2044 pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
2045 new_var,
2046 build_int_cst (lhs_type, val));
2047 }
2048
2049 if (dump_enabled_p ())
2050 dump_printf_loc (MSG_NOTE, vect_location,
2051 "created pattern stmt: %G", pattern_stmt);
2052
2053 return pattern_stmt;
2054 }
2055
2056 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2057
2058 Try to find the following pattern:
2059
2060 UTYPE1 A;
2061 TYPE1 B;
2062 UTYPE2 temp_in;
2063 TYPE3 temp_out;
2064 temp_in = (UTYPE2)A;
2065
2066 temp_out = __builtin_popcount{,l,ll} (temp_in);
2067 B = (TYPE1) temp_out;
2068
2069 TYPE2 may or may not be equal to TYPE3.
2070 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2071 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2072
2073 Input:
2074
2075 * STMT_VINFO: The stmt from which the pattern search begins.
2076 here it starts with B = (TYPE1) temp_out;
2077
2078 Output:
2079
2080 * TYPE_OUT: The vector type of the output of this pattern.
2081
2082 * Return value: A new stmt that will be used to replace the sequence of
2083 stmts that constitute the pattern. In this case it will be:
2084 B = .POPCOUNT (A);
2085
2086 Similarly for clz, ctz and ffs.
2087 */
2088
2089 static gimple *
2090 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
2091 stmt_vec_info stmt_vinfo,
2092 tree *type_out)
2093 {
2094 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
2095 gimple *call_stmt, *pattern_stmt;
2096 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
2097 internal_fn ifn = IFN_LAST;
2098 int addend = 0;
2099
2100 /* Find B = (TYPE1) temp_out. */
2101 if (!last_stmt)
2102 return NULL;
2103 tree_code code = gimple_assign_rhs_code (last_stmt);
2104 if (!CONVERT_EXPR_CODE_P (code))
2105 return NULL;
2106
2107 lhs_oprnd = gimple_assign_lhs (last_stmt);
2108 lhs_type = TREE_TYPE (lhs_oprnd);
2109 if (!INTEGRAL_TYPE_P (lhs_type))
2110 return NULL;
2111
2112 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
2113 if (TREE_CODE (rhs_oprnd) != SSA_NAME
2114 || !has_single_use (rhs_oprnd))
2115 return NULL;
2116 call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
2117
2118 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2119 if (!is_gimple_call (call_stmt))
2120 return NULL;
2121 switch (gimple_call_combined_fn (call_stmt))
2122 {
2123 int val;
2124 CASE_CFN_POPCOUNT:
2125 ifn = IFN_POPCOUNT;
2126 break;
2127 CASE_CFN_CLZ:
2128 ifn = IFN_CLZ;
2129 /* Punt if call result is unsigned and defined value at zero
2130 is negative, as the negative value doesn't extend correctly. */
2131 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2132 && gimple_call_internal_p (call_stmt)
2133 && CLZ_DEFINED_VALUE_AT_ZERO
2134 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2135 && val < 0)
2136 return NULL;
2137 break;
2138 CASE_CFN_CTZ:
2139 ifn = IFN_CTZ;
2140 /* Punt if call result is unsigned and defined value at zero
2141 is negative, as the negative value doesn't extend correctly. */
2142 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2143 && gimple_call_internal_p (call_stmt)
2144 && CTZ_DEFINED_VALUE_AT_ZERO
2145 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2146 && val < 0)
2147 return NULL;
2148 break;
2149 CASE_CFN_FFS:
2150 ifn = IFN_FFS;
2151 break;
2152 default:
2153 return NULL;
2154 }
2155
2156 if (gimple_call_num_args (call_stmt) != 1
2157 && gimple_call_num_args (call_stmt) != 2)
2158 return NULL;
2159
2160 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2161 vect_unpromoted_value unprom_diff;
2162 rhs_origin
2163 = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
2164
2165 if (!rhs_origin)
2166 return NULL;
2167
2168 /* Input and output of .POPCOUNT should be same-precision integer. */
2169 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
2170 return NULL;
2171
2172 /* Also A should be unsigned or same precision as temp_in, otherwise
2173 different builtins/internal functions have different behaviors. */
2174 if (TYPE_PRECISION (unprom_diff.type)
2175 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
2176 switch (ifn)
2177 {
2178 case IFN_POPCOUNT:
2179 /* For popcount require zero extension, which doesn't add any
2180 further bits to the count. */
2181 if (!TYPE_UNSIGNED (unprom_diff.type))
2182 return NULL;
2183 break;
2184 case IFN_CLZ:
2185 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2186 if it is undefined at zero or if it matches also for the
2187 defined value there. */
2188 if (!TYPE_UNSIGNED (unprom_diff.type))
2189 return NULL;
2190 if (!type_has_mode_precision_p (lhs_type)
2191 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
2192 return NULL;
2193 addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
2194 - TYPE_PRECISION (lhs_type));
2195 if (gimple_call_internal_p (call_stmt)
2196 && gimple_call_num_args (call_stmt) == 2)
2197 {
2198 int val1, val2;
2199 val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2200 int d2
2201 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2202 val2);
2203 if (d2 != 2 || val1 != val2 + addend)
2204 return NULL;
2205 }
2206 break;
2207 case IFN_CTZ:
2208 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2209 if it is undefined at zero or if it matches also for the
2210 defined value there. */
2211 if (gimple_call_internal_p (call_stmt)
2212 && gimple_call_num_args (call_stmt) == 2)
2213 {
2214 int val1, val2;
2215 val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2216 int d2
2217 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2218 val2);
2219 if (d2 != 2 || val1 != val2)
2220 return NULL;
2221 }
2222 break;
2223 case IFN_FFS:
2224 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2225 break;
2226 default:
2227 gcc_unreachable ();
2228 }
2229
2230 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
2231 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2232 if (!vec_type)
2233 return NULL;
2234
2235 bool supported
2236 = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
2237 if (!supported)
2238 switch (ifn)
2239 {
2240 case IFN_POPCOUNT:
2241 case IFN_CLZ:
2242 return NULL;
2243 case IFN_FFS:
2244 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2245 if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
2246 OPTIMIZE_FOR_SPEED))
2247 break;
2248 /* FALLTHRU */
2249 case IFN_CTZ:
2250 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2251 clz or popcount. */
2252 if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
2253 OPTIMIZE_FOR_SPEED))
2254 break;
2255 if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
2256 OPTIMIZE_FOR_SPEED))
2257 break;
2258 return NULL;
2259 default:
2260 gcc_unreachable ();
2261 }
2262
2263 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2264 call_stmt);
2265
2266 /* Create B = .POPCOUNT (A). */
2267 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2268 tree arg2 = NULL_TREE;
2269 int val;
2270 if (ifn == IFN_CLZ
2271 && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2272 val) == 2)
2273 arg2 = build_int_cst (integer_type_node, val);
2274 else if (ifn == IFN_CTZ
2275 && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2276 val) == 2)
2277 arg2 = build_int_cst (integer_type_node, val);
2278 if (arg2)
2279 pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
2280 else
2281 pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
2282 gimple_call_set_lhs (pattern_stmt, new_var);
2283 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2284 *type_out = vec_type;
2285
2286 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_NOTE, vect_location,
2288 "created pattern stmt: %G", pattern_stmt);
2289
2290 if (addend)
2291 {
2292 gcc_assert (supported);
2293 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2294 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2295 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2296 build_int_cst (lhs_type, addend));
2297 }
2298 else if (!supported)
2299 {
2300 stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2301 STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2302 pattern_stmt
2303 = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
2304 if (pattern_stmt == NULL)
2305 return NULL;
2306 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2307 {
2308 gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2309 gimple_seq_add_seq_without_update (pseq, seq);
2310 }
2311 }
2312 return pattern_stmt;
2313 }
2314
2315 /* Function vect_recog_pow_pattern
2316
2317 Try to find the following pattern:
2318
2319 x = POW (y, N);
2320
2321 with POW being one of pow, powf, powi, powif and N being
2322 either 2 or 0.5.
2323
2324 Input:
2325
2326 * STMT_VINFO: The stmt from which the pattern search begins.
2327
2328 Output:
2329
2330 * TYPE_OUT: The type of the output of this pattern.
2331
2332 * Return value: A new stmt that will be used to replace the sequence of
2333 stmts that constitute the pattern. In this case it will be:
2334 x = x * x
2335 or
2336 x = sqrt (x)
2337 */
2338
2339 static gimple *
2340 vect_recog_pow_pattern (vec_info *vinfo,
2341 stmt_vec_info stmt_vinfo, tree *type_out)
2342 {
2343 gimple *last_stmt = stmt_vinfo->stmt;
2344 tree base, exp;
2345 gimple *stmt;
2346 tree var;
2347
2348 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
2349 return NULL;
2350
2351 switch (gimple_call_combined_fn (last_stmt))
2352 {
2353 CASE_CFN_POW:
2354 CASE_CFN_POWI:
2355 break;
2356
2357 default:
2358 return NULL;
2359 }
2360
2361 base = gimple_call_arg (last_stmt, 0);
2362 exp = gimple_call_arg (last_stmt, 1);
2363 if (TREE_CODE (exp) != REAL_CST
2364 && TREE_CODE (exp) != INTEGER_CST)
2365 {
2366 if (flag_unsafe_math_optimizations
2367 && TREE_CODE (base) == REAL_CST
2368 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2369 {
2370 combined_fn log_cfn;
2371 built_in_function exp_bfn;
2372 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
2373 {
2374 case BUILT_IN_POW:
2375 log_cfn = CFN_BUILT_IN_LOG;
2376 exp_bfn = BUILT_IN_EXP;
2377 break;
2378 case BUILT_IN_POWF:
2379 log_cfn = CFN_BUILT_IN_LOGF;
2380 exp_bfn = BUILT_IN_EXPF;
2381 break;
2382 case BUILT_IN_POWL:
2383 log_cfn = CFN_BUILT_IN_LOGL;
2384 exp_bfn = BUILT_IN_EXPL;
2385 break;
2386 default:
2387 return NULL;
2388 }
2389 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2390 tree exp_decl = builtin_decl_implicit (exp_bfn);
2391 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2392 does that, but if C is a power of 2, we want to use
2393 exp2 (log2 (C) * x) in the non-vectorized version, but for
2394 vectorization we don't have vectorized exp2. */
2395 if (logc
2396 && TREE_CODE (logc) == REAL_CST
2397 && exp_decl
2398 && lookup_attribute ("omp declare simd",
2399 DECL_ATTRIBUTES (exp_decl)))
2400 {
2401 cgraph_node *node = cgraph_node::get_create (exp_decl);
2402 if (node->simd_clones == NULL)
2403 {
2404 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2405 || node->definition)
2406 return NULL;
2407 expand_simd_clones (node);
2408 if (node->simd_clones == NULL)
2409 return NULL;
2410 }
2411 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2412 if (!*type_out)
2413 return NULL;
2414 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2415 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2416 append_pattern_def_seq (vinfo, stmt_vinfo, g);
2417 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2418 g = gimple_build_call (exp_decl, 1, def);
2419 gimple_call_set_lhs (g, res);
2420 return g;
2421 }
2422 }
2423
2424 return NULL;
2425 }
2426
2427 /* We now have a pow or powi builtin function call with a constant
2428 exponent. */
2429
2430 /* Catch squaring. */
2431 if ((tree_fits_shwi_p (exp)
2432 && tree_to_shwi (exp) == 2)
2433 || (TREE_CODE (exp) == REAL_CST
2434 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2435 {
2436 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
2437 TREE_TYPE (base), type_out))
2438 return NULL;
2439
2440 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2441 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2442 return stmt;
2443 }
2444
2445 /* Catch square root. */
2446 if (TREE_CODE (exp) == REAL_CST
2447 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2448 {
2449 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2450 if (*type_out
2451 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2452 OPTIMIZE_FOR_SPEED))
2453 {
2454 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2455 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2456 gimple_call_set_lhs (stmt, var);
2457 gimple_call_set_nothrow (stmt, true);
2458 return stmt;
2459 }
2460 }
2461
2462 return NULL;
2463 }
2464
2465
2466 /* Function vect_recog_widen_sum_pattern
2467
2468 Try to find the following pattern:
2469
2470 type x_t;
2471 TYPE x_T, sum = init;
2472 loop:
2473 sum_0 = phi <init, sum_1>
2474 S1 x_t = *p;
2475 S2 x_T = (TYPE) x_t;
2476 S3 sum_1 = x_T + sum_0;
2477
2478 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2479 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2480 a special case of a reduction computation.
2481
2482 Input:
2483
2484 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2485 when this function is called with S3, the pattern {S2,S3} will be detected.
2486
2487 Output:
2488
2489 * TYPE_OUT: The type of the output of this pattern.
2490
2491 * Return value: A new stmt that will be used to replace the sequence of
2492 stmts that constitute the pattern. In this case it will be:
2493 WIDEN_SUM <x_t, sum_0>
2494
2495 Note: The widening-sum idiom is a widening reduction pattern that is
2496 vectorized without preserving all the intermediate results. It
2497 produces only N/2 (widened) results (by summing up pairs of
2498 intermediate results) rather than all N results. Therefore, we
2499 cannot allow this pattern when we want to get all the results and in
2500 the correct order (as is the case when this computation is in an
2501 inner-loop nested in an outer-loop that us being vectorized). */
2502
2503 static gimple *
2504 vect_recog_widen_sum_pattern (vec_info *vinfo,
2505 stmt_vec_info stmt_vinfo, tree *type_out)
2506 {
2507 gimple *last_stmt = stmt_vinfo->stmt;
2508 tree oprnd0, oprnd1;
2509 tree type;
2510 gimple *pattern_stmt;
2511 tree var;
2512
2513 /* Look for the following pattern
2514 DX = (TYPE) X;
2515 sum_1 = DX + sum_0;
2516 In which DX is at least double the size of X, and sum_1 has been
2517 recognized as a reduction variable.
2518 */
2519
2520 /* Starting from LAST_STMT, follow the defs of its uses in search
2521 of the above pattern. */
2522
2523 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
2524 &oprnd0, &oprnd1)
2525 || TREE_CODE (oprnd0) != SSA_NAME
2526 || !vinfo->lookup_def (oprnd0))
2527 return NULL;
2528
2529 type = TREE_TYPE (gimple_get_lhs (last_stmt));
2530
2531 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2532 we know that oprnd1 is the reduction variable (defined by a loop-header
2533 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2534 Left to check that oprnd0 is defined by a cast from type 'type' to type
2535 'TYPE'. */
2536
2537 vect_unpromoted_value unprom0;
2538 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
2539 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2540 return NULL;
2541
2542 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
2543
2544 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
2545 unprom0.type, type_out))
2546 return NULL;
2547
2548 var = vect_recog_temp_ssa_var (type, NULL);
2549 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2550
2551 return pattern_stmt;
2552 }
2553
2554 /* Function vect_recog_bitfield_ref_pattern
2555
2556 Try to find the following pattern:
2557
2558 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2559 result = (type_out) bf_value;
2560
2561 or
2562
2563 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2564
2565 where type_out is a non-bitfield type, that is to say, it's precision matches
2566 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2567
2568 Input:
2569
2570 * STMT_VINFO: The stmt from which the pattern search begins.
2571 here it starts with:
2572 result = (type_out) bf_value;
2573
2574 or
2575
2576 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2577
2578 Output:
2579
2580 * TYPE_OUT: The vector type of the output of this pattern.
2581
2582 * Return value: A new stmt that will be used to replace the sequence of
2583 stmts that constitute the pattern. If the precision of type_out is bigger
2584 than the precision type of _1 we perform the widening before the shifting,
2585 since the new precision will be large enough to shift the value and moving
2586 widening operations up the statement chain enables the generation of
2587 widening loads. If we are widening and the operation after the pattern is
2588 an addition then we mask first and shift later, to enable the generation of
2589 shifting adds. In the case of narrowing we will always mask first, shift
2590 last and then perform a narrowing operation. This will enable the
2591 generation of narrowing shifts.
2592
2593 Widening with mask first, shift later:
2594 container = (type_out) container;
2595 masked = container & (((1 << bitsize) - 1) << bitpos);
2596 result = masked >> bitpos;
2597
2598 Widening with shift first, mask last:
2599 container = (type_out) container;
2600 shifted = container >> bitpos;
2601 result = shifted & ((1 << bitsize) - 1);
2602
2603 Narrowing:
2604 masked = container & (((1 << bitsize) - 1) << bitpos);
2605 result = masked >> bitpos;
2606 result = (type_out) result;
2607
2608 If the bitfield is signed and it's wider than type_out, we need to
2609 keep the result sign-extended:
2610 container = (type) container;
2611 masked = container << (prec - bitsize - bitpos);
2612 result = (type_out) (masked >> (prec - bitsize));
2613
2614 Here type is the signed variant of the wider of type_out and the type
2615 of container.
2616
2617 The shifting is always optional depending on whether bitpos != 0.
2618
2619 When the original bitfield was inside a gcond then an new gcond is also
2620 generated with the newly `result` as the operand to the comparison.
2621
2622 */
2623
2624 static gimple *
2625 vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2626 tree *type_out)
2627 {
2628 gimple *bf_stmt = NULL;
2629 tree lhs = NULL_TREE;
2630 tree ret_type = NULL_TREE;
2631 gimple *stmt = STMT_VINFO_STMT (stmt_info);
2632 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
2633 {
2634 tree op = gimple_cond_lhs (cond_stmt);
2635 if (TREE_CODE (op) != SSA_NAME)
2636 return NULL;
2637 bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
2638 if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
2639 return NULL;
2640 }
2641 else if (is_gimple_assign (stmt)
2642 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
2643 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2644 {
2645 gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2646 bf_stmt = dyn_cast <gassign *> (second_stmt);
2647 lhs = gimple_assign_lhs (stmt);
2648 ret_type = TREE_TYPE (lhs);
2649 }
2650
2651 if (!bf_stmt
2652 || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
2653 return NULL;
2654
2655 tree bf_ref = gimple_assign_rhs1 (bf_stmt);
2656 tree container = TREE_OPERAND (bf_ref, 0);
2657 ret_type = ret_type ? ret_type : TREE_TYPE (container);
2658
2659 if (!bit_field_offset (bf_ref).is_constant ()
2660 || !bit_field_size (bf_ref).is_constant ()
2661 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2662 return NULL;
2663
2664 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2665 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2666 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2667 return NULL;
2668
2669 gimple *use_stmt, *pattern_stmt;
2670 use_operand_p use_p;
2671 bool shift_first = true;
2672 tree container_type = TREE_TYPE (container);
2673 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2674
2675 /* Calculate shift_n before the adjustments for widening loads, otherwise
2676 the container may change and we have to consider offset change for
2677 widening loads on big endianness. The shift_n calculated here can be
2678 independent of widening. */
2679 unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
2680 unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
2681 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2682 if (BYTES_BIG_ENDIAN)
2683 shift_n = prec - shift_n - mask_width;
2684
2685 bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
2686 TYPE_PRECISION (ret_type) > mask_width);
2687 bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
2688 TYPE_PRECISION (ret_type));
2689
2690 /* We move the conversion earlier if the loaded type is smaller than the
2691 return type to enable the use of widening loads. And if we need a
2692 sign extension, we need to convert the loaded value early to a signed
2693 type as well. */
2694 if (ref_sext || load_widen)
2695 {
2696 tree type = load_widen ? ret_type : container_type;
2697 if (ref_sext)
2698 type = gimple_signed_type (type);
2699 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
2700 NOP_EXPR, container);
2701 container = gimple_get_lhs (pattern_stmt);
2702 container_type = TREE_TYPE (container);
2703 prec = tree_to_uhwi (TYPE_SIZE (container_type));
2704 vectype = get_vectype_for_scalar_type (vinfo, container_type);
2705 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2706 }
2707 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2708 /* If we are doing the conversion last then also delay the shift as we may
2709 be able to combine the shift and conversion in certain cases. */
2710 shift_first = false;
2711
2712 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2713 PLUS_EXPR then do the shift last as some targets can combine the shift and
2714 add into a single instruction. */
2715 if (lhs && single_imm_use (lhs, &use_p, &use_stmt))
2716 {
2717 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
2718 && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
2719 shift_first = false;
2720 }
2721
2722 /* If we don't have to shift we only generate the mask, so just fix the
2723 code-path to shift_first. */
2724 if (shift_n == 0)
2725 shift_first = true;
2726
2727 tree result;
2728 if (shift_first && !ref_sext)
2729 {
2730 tree shifted = container;
2731 if (shift_n)
2732 {
2733 pattern_stmt
2734 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2735 RSHIFT_EXPR, container,
2736 build_int_cst (sizetype, shift_n));
2737 shifted = gimple_assign_lhs (pattern_stmt);
2738 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2739 }
2740
2741 tree mask = wide_int_to_tree (container_type,
2742 wi::mask (mask_width, false, prec));
2743
2744 pattern_stmt
2745 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2746 BIT_AND_EXPR, shifted, mask);
2747 result = gimple_assign_lhs (pattern_stmt);
2748 }
2749 else
2750 {
2751 tree temp = vect_recog_temp_ssa_var (container_type);
2752 if (!ref_sext)
2753 {
2754 tree mask = wide_int_to_tree (container_type,
2755 wi::shifted_mask (shift_n,
2756 mask_width,
2757 false, prec));
2758 pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
2759 container, mask);
2760 }
2761 else
2762 {
2763 HOST_WIDE_INT shl = prec - shift_n - mask_width;
2764 shift_n += shl;
2765 pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
2766 container,
2767 build_int_cst (sizetype,
2768 shl));
2769 }
2770
2771 tree masked = gimple_assign_lhs (pattern_stmt);
2772 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2773 pattern_stmt
2774 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2775 RSHIFT_EXPR, masked,
2776 build_int_cst (sizetype, shift_n));
2777 result = gimple_assign_lhs (pattern_stmt);
2778 }
2779
2780 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2781 {
2782 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2783 pattern_stmt
2784 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2785 NOP_EXPR, result);
2786 }
2787
2788 if (!lhs)
2789 {
2790 if (!vectype)
2791 return NULL;
2792
2793 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2794 vectype = truth_type_for (vectype);
2795
2796 /* FIXME: This part extracts the boolean value out of the bitfield in the
2797 same way as vect_recog_gcond_pattern does. However because
2798 patterns cannot match the same root twice, when we handle and
2799 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
2800 apply anymore. We should really fix it so that we don't need to
2801 duplicate transformations like these. */
2802 tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
2803 gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
2804 tree cond_cst = gimple_cond_rhs (cond_stmt);
2805 gimple *new_stmt
2806 = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
2807 gimple_get_lhs (pattern_stmt),
2808 fold_convert (container_type, cond_cst));
2809 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
2810 pattern_stmt
2811 = gimple_build_cond (NE_EXPR, new_lhs,
2812 build_zero_cst (TREE_TYPE (new_lhs)),
2813 NULL_TREE, NULL_TREE);
2814 }
2815
2816 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2817 vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2818
2819 return pattern_stmt;
2820 }
2821
2822 /* Function vect_recog_bit_insert_pattern
2823
2824 Try to find the following pattern:
2825
2826 written = BIT_INSERT_EXPR (container, value, bitpos);
2827
2828 Input:
2829
2830 * STMT_VINFO: The stmt we want to replace.
2831
2832 Output:
2833
2834 * TYPE_OUT: The vector type of the output of this pattern.
2835
2836 * Return value: A new stmt that will be used to replace the sequence of
2837 stmts that constitute the pattern. In this case it will be:
2838 value = (container_type) value; // Make sure
2839 shifted = value << bitpos; // Shift value into place
2840 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2841 // the 'to-write value'.
2842 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2843 // write to from the value we want
2844 // to write to.
2845 written = cleared | masked; // Write bits.
2846
2847
2848 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2849 bits corresponding to the real size of the bitfield value we are writing to.
2850 The shifting is always optional depending on whether bitpos != 0.
2851
2852 */
2853
2854 static gimple *
2855 vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2856 tree *type_out)
2857 {
2858 gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2859 if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2860 return NULL;
2861
2862 tree container = gimple_assign_rhs1 (bf_stmt);
2863 tree value = gimple_assign_rhs2 (bf_stmt);
2864 tree shift = gimple_assign_rhs3 (bf_stmt);
2865
2866 tree bf_type = TREE_TYPE (value);
2867 tree container_type = TREE_TYPE (container);
2868
2869 if (!INTEGRAL_TYPE_P (container_type)
2870 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2871 return NULL;
2872
2873 gimple *pattern_stmt;
2874
2875 vect_unpromoted_value unprom;
2876 unprom.set_op (value, vect_internal_def);
2877 value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2878 get_vectype_for_scalar_type (vinfo,
2879 container_type));
2880
2881 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2882 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2883 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2884 if (BYTES_BIG_ENDIAN)
2885 {
2886 shift_n = prec - shift_n - mask_width;
2887 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2888 }
2889
2890 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2891 {
2892 pattern_stmt =
2893 gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2894 NOP_EXPR, value);
2895 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2896 value = gimple_get_lhs (pattern_stmt);
2897 }
2898
2899 /* Shift VALUE into place. */
2900 tree shifted = value;
2901 if (shift_n)
2902 {
2903 gimple_seq stmts = NULL;
2904 shifted
2905 = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2906 if (!gimple_seq_empty_p (stmts))
2907 append_pattern_def_seq (vinfo, stmt_info,
2908 gimple_seq_first_stmt (stmts));
2909 }
2910
2911 tree mask_t
2912 = wide_int_to_tree (container_type,
2913 wi::shifted_mask (shift_n, mask_width, false, prec));
2914
2915 /* Clear bits we don't want to write back from SHIFTED. */
2916 gimple_seq stmts = NULL;
2917 tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2918 mask_t);
2919 if (!gimple_seq_empty_p (stmts))
2920 {
2921 pattern_stmt = gimple_seq_first_stmt (stmts);
2922 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2923 }
2924
2925 /* Mask off the bits in the container that we are to write to. */
2926 mask_t = wide_int_to_tree (container_type,
2927 wi::shifted_mask (shift_n, mask_width, true, prec));
2928 tree cleared = vect_recog_temp_ssa_var (container_type);
2929 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2930 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2931
2932 /* Write MASKED into CLEARED. */
2933 pattern_stmt
2934 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2935 BIT_IOR_EXPR, cleared, masked);
2936
2937 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2938 vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2939
2940 return pattern_stmt;
2941 }
2942
2943
2944 /* Recognize cases in which an operation is performed in one type WTYPE
2945 but could be done more efficiently in a narrower type NTYPE. For example,
2946 if we have:
2947
2948 ATYPE a; // narrower than NTYPE
2949 BTYPE b; // narrower than NTYPE
2950 WTYPE aw = (WTYPE) a;
2951 WTYPE bw = (WTYPE) b;
2952 WTYPE res = aw + bw; // only uses of aw and bw
2953
2954 then it would be more efficient to do:
2955
2956 NTYPE an = (NTYPE) a;
2957 NTYPE bn = (NTYPE) b;
2958 NTYPE resn = an + bn;
2959 WTYPE res = (WTYPE) resn;
2960
2961 Other situations include things like:
2962
2963 ATYPE a; // NTYPE or narrower
2964 WTYPE aw = (WTYPE) a;
2965 WTYPE res = aw + b;
2966
2967 when only "(NTYPE) res" is significant. In that case it's more efficient
2968 to truncate "b" and do the operation on NTYPE instead:
2969
2970 NTYPE an = (NTYPE) a;
2971 NTYPE bn = (NTYPE) b; // truncation
2972 NTYPE resn = an + bn;
2973 WTYPE res = (WTYPE) resn;
2974
2975 All users of "res" should then use "resn" instead, making the final
2976 statement dead (not marked as relevant). The final statement is still
2977 needed to maintain the type correctness of the IR.
2978
2979 vect_determine_precisions has already determined the minimum
2980 precison of the operation and the minimum precision required
2981 by users of the result. */
2982
2983 static gimple *
2984 vect_recog_over_widening_pattern (vec_info *vinfo,
2985 stmt_vec_info last_stmt_info, tree *type_out)
2986 {
2987 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2988 if (!last_stmt)
2989 return NULL;
2990
2991 /* See whether we have found that this operation can be done on a
2992 narrower type without changing its semantics. */
2993 unsigned int new_precision = last_stmt_info->operation_precision;
2994 if (!new_precision)
2995 return NULL;
2996
2997 tree lhs = gimple_assign_lhs (last_stmt);
2998 tree type = TREE_TYPE (lhs);
2999 tree_code code = gimple_assign_rhs_code (last_stmt);
3000
3001 /* Punt for reductions where we don't handle the type conversions. */
3002 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
3003 return NULL;
3004
3005 /* Keep the first operand of a COND_EXPR as-is: only the other two
3006 operands are interesting. */
3007 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
3008
3009 /* Check the operands. */
3010 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
3011 auto_vec <vect_unpromoted_value, 3> unprom (nops);
3012 unprom.quick_grow_cleared (nops);
3013 unsigned int min_precision = 0;
3014 bool single_use_p = false;
3015 for (unsigned int i = 0; i < nops; ++i)
3016 {
3017 tree op = gimple_op (last_stmt, first_op + i);
3018 if (TREE_CODE (op) == INTEGER_CST)
3019 unprom[i].set_op (op, vect_constant_def);
3020 else if (TREE_CODE (op) == SSA_NAME)
3021 {
3022 bool op_single_use_p = true;
3023 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
3024 &op_single_use_p))
3025 return NULL;
3026 /* If:
3027
3028 (1) N bits of the result are needed;
3029 (2) all inputs are widened from M<N bits; and
3030 (3) one operand OP is a single-use SSA name
3031
3032 we can shift the M->N widening from OP to the output
3033 without changing the number or type of extensions involved.
3034 This then reduces the number of copies of STMT_INFO.
3035
3036 If instead of (3) more than one operand is a single-use SSA name,
3037 shifting the extension to the output is even more of a win.
3038
3039 If instead:
3040
3041 (1) N bits of the result are needed;
3042 (2) one operand OP2 is widened from M2<N bits;
3043 (3) another operand OP1 is widened from M1<M2 bits; and
3044 (4) both OP1 and OP2 are single-use
3045
3046 the choice is between:
3047
3048 (a) truncating OP2 to M1, doing the operation on M1,
3049 and then widening the result to N
3050
3051 (b) widening OP1 to M2, doing the operation on M2, and then
3052 widening the result to N
3053
3054 Both shift the M2->N widening of the inputs to the output.
3055 (a) additionally shifts the M1->M2 widening to the output;
3056 it requires fewer copies of STMT_INFO but requires an extra
3057 M2->M1 truncation.
3058
3059 Which is better will depend on the complexity and cost of
3060 STMT_INFO, which is hard to predict at this stage. However,
3061 a clear tie-breaker in favor of (b) is the fact that the
3062 truncation in (a) increases the length of the operation chain.
3063
3064 If instead of (4) only one of OP1 or OP2 is single-use,
3065 (b) is still a win over doing the operation in N bits:
3066 it still shifts the M2->N widening on the single-use operand
3067 to the output and reduces the number of STMT_INFO copies.
3068
3069 If neither operand is single-use then operating on fewer than
3070 N bits might lead to more extensions overall. Whether it does
3071 or not depends on global information about the vectorization
3072 region, and whether that's a good trade-off would again
3073 depend on the complexity and cost of the statements involved,
3074 as well as things like register pressure that are not normally
3075 modelled at this stage. We therefore ignore these cases
3076 and just optimize the clear single-use wins above.
3077
3078 Thus we take the maximum precision of the unpromoted operands
3079 and record whether any operand is single-use. */
3080 if (unprom[i].dt == vect_internal_def)
3081 {
3082 min_precision = MAX (min_precision,
3083 TYPE_PRECISION (unprom[i].type));
3084 single_use_p |= op_single_use_p;
3085 }
3086 }
3087 else
3088 return NULL;
3089 }
3090
3091 /* Although the operation could be done in operation_precision, we have
3092 to balance that against introducing extra truncations or extensions.
3093 Calculate the minimum precision that can be handled efficiently.
3094
3095 The loop above determined that the operation could be handled
3096 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3097 extension from the inputs to the output without introducing more
3098 instructions, and would reduce the number of instructions required
3099 for STMT_INFO itself.
3100
3101 vect_determine_precisions has also determined that the result only
3102 needs min_output_precision bits. Truncating by a factor of N times
3103 requires a tree of N - 1 instructions, so if TYPE is N times wider
3104 than min_output_precision, doing the operation in TYPE and truncating
3105 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3106 In contrast:
3107
3108 - truncating the input to a unary operation and doing the operation
3109 in the new type requires at most N - 1 + 1 = N instructions per
3110 output vector
3111
3112 - doing the same for a binary operation requires at most
3113 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3114
3115 Both unary and binary operations require fewer instructions than
3116 this if the operands were extended from a suitable truncated form.
3117 Thus there is usually nothing to lose by doing operations in
3118 min_output_precision bits, but there can be something to gain. */
3119 if (!single_use_p)
3120 min_precision = last_stmt_info->min_output_precision;
3121 else
3122 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
3123
3124 /* Apply the minimum efficient precision we just calculated. */
3125 if (new_precision < min_precision)
3126 new_precision = min_precision;
3127 new_precision = vect_element_precision (new_precision);
3128 if (new_precision >= TYPE_PRECISION (type))
3129 return NULL;
3130
3131 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
3132
3133 *type_out = get_vectype_for_scalar_type (vinfo, type);
3134 if (!*type_out)
3135 return NULL;
3136
3137 /* We've found a viable pattern. Get the new type of the operation. */
3138 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
3139 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
3140
3141 /* If we're truncating an operation, we need to make sure that we
3142 don't introduce new undefined overflow. The codes tested here are
3143 a subset of those accepted by vect_truncatable_operation_p. */
3144 tree op_type = new_type;
3145 if (TYPE_OVERFLOW_UNDEFINED (new_type)
3146 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
3147 op_type = build_nonstandard_integer_type (new_precision, true);
3148
3149 /* We specifically don't check here whether the target supports the
3150 new operation, since it might be something that a later pattern
3151 wants to rewrite anyway. If targets have a minimum element size
3152 for some optabs, we should pattern-match smaller ops to larger ops
3153 where beneficial. */
3154 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3155 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
3156 if (!new_vectype || !op_vectype)
3157 return NULL;
3158
3159 if (dump_enabled_p ())
3160 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
3161 type, new_type);
3162
3163 /* Calculate the rhs operands for an operation on OP_TYPE. */
3164 tree ops[3] = {};
3165 for (unsigned int i = 1; i < first_op; ++i)
3166 ops[i - 1] = gimple_op (last_stmt, i);
3167 /* For right shifts limit the shift operand. */
3168 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
3169 op_type, &unprom[0], op_vectype);
3170
3171 /* Limit shift operands. */
3172 if (code == RSHIFT_EXPR)
3173 {
3174 wide_int min_value, max_value;
3175 if (TREE_CODE (ops[1]) == INTEGER_CST)
3176 ops[1] = wide_int_to_tree (op_type,
3177 wi::umin (wi::to_wide (ops[1]),
3178 new_precision - 1));
3179 else if (!vect_get_range_info (ops[1], &min_value, &max_value)
3180 || wi::ge_p (max_value, new_precision, TYPE_SIGN (op_type)))
3181 {
3182 /* ??? Note the following bad for SLP as that only supports
3183 same argument widened shifts and it un-CSEs same arguments. */
3184 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3185 gimple *pattern_stmt
3186 = gimple_build_assign (new_var, MIN_EXPR, ops[1],
3187 build_int_cst (op_type, new_precision - 1));
3188 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3189 if (ops[1] == unprom[1].op && unprom[1].dt == vect_external_def)
3190 {
3191 if (edge e = vect_get_external_def_edge (vinfo, ops[1]))
3192 {
3193 basic_block new_bb
3194 = gsi_insert_on_edge_immediate (e, pattern_stmt);
3195 gcc_assert (!new_bb);
3196 }
3197 else
3198 return NULL;
3199 }
3200 else
3201 append_pattern_def_seq (vinfo, last_stmt_info, pattern_stmt,
3202 op_vectype);
3203 ops[1] = new_var;
3204 }
3205 }
3206
3207 /* Use the operation to produce a result of type OP_TYPE. */
3208 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3209 gimple *pattern_stmt = gimple_build_assign (new_var, code,
3210 ops[0], ops[1], ops[2]);
3211 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3212
3213 if (dump_enabled_p ())
3214 dump_printf_loc (MSG_NOTE, vect_location,
3215 "created pattern stmt: %G", pattern_stmt);
3216
3217 /* Convert back to the original signedness, if OP_TYPE is different
3218 from NEW_TYPE. */
3219 if (op_type != new_type)
3220 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
3221 pattern_stmt, op_vectype);
3222
3223 /* Promote the result to the original type. */
3224 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
3225 pattern_stmt, new_vectype);
3226
3227 return pattern_stmt;
3228 }
3229
3230 /* Recognize the following patterns:
3231
3232 ATYPE a; // narrower than TYPE
3233 BTYPE b; // narrower than TYPE
3234
3235 1) Multiply high with scaling
3236 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3237 Here, c is bitsize (TYPE) / 2 - 1.
3238
3239 2) ... or also with rounding
3240 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3241 Here, d is bitsize (TYPE) / 2 - 2.
3242
3243 3) Normal multiply high
3244 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3245 Here, e is bitsize (TYPE) / 2.
3246
3247 where only the bottom half of res is used. */
3248
3249 static gimple *
3250 vect_recog_mulhs_pattern (vec_info *vinfo,
3251 stmt_vec_info last_stmt_info, tree *type_out)
3252 {
3253 /* Check for a right shift. */
3254 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3255 if (!last_stmt
3256 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
3257 return NULL;
3258
3259 /* Check that the shift result is wider than the users of the
3260 result need (i.e. that narrowing would be a natural choice). */
3261 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3262 unsigned int target_precision
3263 = vect_element_precision (last_stmt_info->min_output_precision);
3264 if (!INTEGRAL_TYPE_P (lhs_type)
3265 || target_precision >= TYPE_PRECISION (lhs_type))
3266 return NULL;
3267
3268 /* Look through any change in sign on the outer shift input. */
3269 vect_unpromoted_value unprom_rshift_input;
3270 tree rshift_input = vect_look_through_possible_promotion
3271 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
3272 if (!rshift_input
3273 || TYPE_PRECISION (TREE_TYPE (rshift_input))
3274 != TYPE_PRECISION (lhs_type))
3275 return NULL;
3276
3277 /* Get the definition of the shift input. */
3278 stmt_vec_info rshift_input_stmt_info
3279 = vect_get_internal_def (vinfo, rshift_input);
3280 if (!rshift_input_stmt_info)
3281 return NULL;
3282 gassign *rshift_input_stmt
3283 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
3284 if (!rshift_input_stmt)
3285 return NULL;
3286
3287 stmt_vec_info mulh_stmt_info;
3288 tree scale_term;
3289 bool rounding_p = false;
3290
3291 /* Check for the presence of the rounding term. */
3292 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
3293 {
3294 /* Check that the outer shift was by 1. */
3295 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
3296 return NULL;
3297
3298 /* Check that the second operand of the PLUS_EXPR is 1. */
3299 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
3300 return NULL;
3301
3302 /* Look through any change in sign on the addition input. */
3303 vect_unpromoted_value unprom_plus_input;
3304 tree plus_input = vect_look_through_possible_promotion
3305 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
3306 if (!plus_input
3307 || TYPE_PRECISION (TREE_TYPE (plus_input))
3308 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
3309 return NULL;
3310
3311 /* Get the definition of the multiply-high-scale part. */
3312 stmt_vec_info plus_input_stmt_info
3313 = vect_get_internal_def (vinfo, plus_input);
3314 if (!plus_input_stmt_info)
3315 return NULL;
3316 gassign *plus_input_stmt
3317 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
3318 if (!plus_input_stmt
3319 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
3320 return NULL;
3321
3322 /* Look through any change in sign on the scaling input. */
3323 vect_unpromoted_value unprom_scale_input;
3324 tree scale_input = vect_look_through_possible_promotion
3325 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
3326 if (!scale_input
3327 || TYPE_PRECISION (TREE_TYPE (scale_input))
3328 != TYPE_PRECISION (TREE_TYPE (plus_input)))
3329 return NULL;
3330
3331 /* Get the definition of the multiply-high part. */
3332 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
3333 if (!mulh_stmt_info)
3334 return NULL;
3335
3336 /* Get the scaling term. */
3337 scale_term = gimple_assign_rhs2 (plus_input_stmt);
3338 rounding_p = true;
3339 }
3340 else
3341 {
3342 mulh_stmt_info = rshift_input_stmt_info;
3343 scale_term = gimple_assign_rhs2 (last_stmt);
3344 }
3345
3346 /* Check that the scaling factor is constant. */
3347 if (TREE_CODE (scale_term) != INTEGER_CST)
3348 return NULL;
3349
3350 /* Check whether the scaling input term can be seen as two widened
3351 inputs multiplied together. */
3352 vect_unpromoted_value unprom_mult[2];
3353 tree new_type;
3354 unsigned int nops
3355 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
3356 false, 2, unprom_mult, &new_type);
3357 if (nops != 2)
3358 return NULL;
3359
3360 /* Adjust output precision. */
3361 if (TYPE_PRECISION (new_type) < target_precision)
3362 new_type = build_nonstandard_integer_type
3363 (target_precision, TYPE_UNSIGNED (new_type));
3364
3365 unsigned mult_precision = TYPE_PRECISION (new_type);
3366 internal_fn ifn;
3367 /* Check that the scaling factor is expected. Instead of
3368 target_precision, we should use the one that we actually
3369 use for internal function. */
3370 if (rounding_p)
3371 {
3372 /* Check pattern 2). */
3373 if (wi::to_widest (scale_term) + mult_precision + 2
3374 != TYPE_PRECISION (lhs_type))
3375 return NULL;
3376
3377 ifn = IFN_MULHRS;
3378 }
3379 else
3380 {
3381 /* Check for pattern 1). */
3382 if (wi::to_widest (scale_term) + mult_precision + 1
3383 == TYPE_PRECISION (lhs_type))
3384 ifn = IFN_MULHS;
3385 /* Check for pattern 3). */
3386 else if (wi::to_widest (scale_term) + mult_precision
3387 == TYPE_PRECISION (lhs_type))
3388 ifn = IFN_MULH;
3389 else
3390 return NULL;
3391 }
3392
3393 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
3394
3395 /* Check for target support. */
3396 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3397 if (!new_vectype
3398 || !direct_internal_fn_supported_p
3399 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3400 return NULL;
3401
3402 /* The IR requires a valid vector type for the cast result, even though
3403 it's likely to be discarded. */
3404 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3405 if (!*type_out)
3406 return NULL;
3407
3408 /* Generate the IFN_MULHRS call. */
3409 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3410 tree new_ops[2];
3411 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3412 unprom_mult, new_vectype);
3413 gcall *mulhrs_stmt
3414 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3415 gimple_call_set_lhs (mulhrs_stmt, new_var);
3416 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
3417
3418 if (dump_enabled_p ())
3419 dump_printf_loc (MSG_NOTE, vect_location,
3420 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3421
3422 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
3423 mulhrs_stmt, new_vectype);
3424 }
3425
3426 /* Recognize the patterns:
3427
3428 ATYPE a; // narrower than TYPE
3429 BTYPE b; // narrower than TYPE
3430 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3431 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3432
3433 where only the bottom half of avg is used. Try to transform them into:
3434
3435 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3436 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3437
3438 followed by:
3439
3440 TYPE avg = (TYPE) avg';
3441
3442 where NTYPE is no wider than half of TYPE. Since only the bottom half
3443 of avg is used, all or part of the cast of avg' should become redundant.
3444
3445 If there is no target support available, generate code to distribute rshift
3446 over plus and add a carry. */
3447
3448 static gimple *
3449 vect_recog_average_pattern (vec_info *vinfo,
3450 stmt_vec_info last_stmt_info, tree *type_out)
3451 {
3452 /* Check for a shift right by one bit. */
3453 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3454 if (!last_stmt
3455 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
3456 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
3457 return NULL;
3458
3459 /* Check that the shift result is wider than the users of the
3460 result need (i.e. that narrowing would be a natural choice). */
3461 tree lhs = gimple_assign_lhs (last_stmt);
3462 tree type = TREE_TYPE (lhs);
3463 unsigned int target_precision
3464 = vect_element_precision (last_stmt_info->min_output_precision);
3465 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3466 return NULL;
3467
3468 /* Look through any change in sign on the shift input. */
3469 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
3470 vect_unpromoted_value unprom_plus;
3471 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
3472 &unprom_plus);
3473 if (!rshift_rhs
3474 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3475 return NULL;
3476
3477 /* Get the definition of the shift input. */
3478 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
3479 if (!plus_stmt_info)
3480 return NULL;
3481
3482 /* Check whether the shift input can be seen as a tree of additions on
3483 2 or 3 widened inputs.
3484
3485 Note that the pattern should be a win even if the result of one or
3486 more additions is reused elsewhere: if the pattern matches, we'd be
3487 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3488 internal_fn ifn = IFN_AVG_FLOOR;
3489 vect_unpromoted_value unprom[3];
3490 tree new_type;
3491 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
3492 IFN_VEC_WIDEN_PLUS, false, 3,
3493 unprom, &new_type);
3494 if (nops == 0)
3495 return NULL;
3496 if (nops == 3)
3497 {
3498 /* Check that one operand is 1. */
3499 unsigned int i;
3500 for (i = 0; i < 3; ++i)
3501 if (integer_onep (unprom[i].op))
3502 break;
3503 if (i == 3)
3504 return NULL;
3505 /* Throw away the 1 operand and keep the other two. */
3506 if (i < 2)
3507 unprom[i] = unprom[2];
3508 ifn = IFN_AVG_CEIL;
3509 }
3510
3511 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
3512
3513 /* We know that:
3514
3515 (a) the operation can be viewed as:
3516
3517 TYPE widened0 = (TYPE) UNPROM[0];
3518 TYPE widened1 = (TYPE) UNPROM[1];
3519 TYPE tmp1 = widened0 + widened1 {+ 1};
3520 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3521
3522 (b) the first two statements are equivalent to:
3523
3524 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3525 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3526
3527 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3528 where sensible;
3529
3530 (d) all the operations can be performed correctly at twice the width of
3531 NEW_TYPE, due to the nature of the average operation; and
3532
3533 (e) users of the result of the right shift need only TARGET_PRECISION
3534 bits, where TARGET_PRECISION is no more than half of TYPE's
3535 precision.
3536
3537 Under these circumstances, the only situation in which NEW_TYPE
3538 could be narrower than TARGET_PRECISION is if widened0, widened1
3539 and an addition result are all used more than once. Thus we can
3540 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3541 as "free", whereas widening the result of the average instruction
3542 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3543 therefore better not to go narrower than TARGET_PRECISION. */
3544 if (TYPE_PRECISION (new_type) < target_precision)
3545 new_type = build_nonstandard_integer_type (target_precision,
3546 TYPE_UNSIGNED (new_type));
3547
3548 /* Check for target support. */
3549 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3550 if (!new_vectype)
3551 return NULL;
3552
3553 bool fallback_p = false;
3554
3555 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3556 ;
3557 else if (TYPE_UNSIGNED (new_type)
3558 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3559 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3560 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3561 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3562 fallback_p = true;
3563 else
3564 return NULL;
3565
3566 /* The IR requires a valid vector type for the cast result, even though
3567 it's likely to be discarded. */
3568 *type_out = get_vectype_for_scalar_type (vinfo, type);
3569 if (!*type_out)
3570 return NULL;
3571
3572 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3573 tree new_ops[2];
3574 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3575 unprom, new_vectype);
3576
3577 if (fallback_p)
3578 {
3579 /* As a fallback, generate code for following sequence:
3580
3581 shifted_op0 = new_ops[0] >> 1;
3582 shifted_op1 = new_ops[1] >> 1;
3583 sum_of_shifted = shifted_op0 + shifted_op1;
3584 unmasked_carry = new_ops[0] and/or new_ops[1];
3585 carry = unmasked_carry & 1;
3586 new_var = sum_of_shifted + carry;
3587 */
3588
3589 tree one_cst = build_one_cst (new_type);
3590 gassign *g;
3591
3592 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
3593 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3594 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3595
3596 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
3597 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3598 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3599
3600 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
3601 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3602 shifted_op0, shifted_op1);
3603 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3604
3605 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
3606 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3607 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3608 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3609
3610 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
3611 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3612 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3613
3614 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3615 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
3616 }
3617
3618 /* Generate the IFN_AVG* call. */
3619 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3620 new_ops[1]);
3621 gimple_call_set_lhs (average_stmt, new_var);
3622 gimple_set_location (average_stmt, gimple_location (last_stmt));
3623
3624 if (dump_enabled_p ())
3625 dump_printf_loc (MSG_NOTE, vect_location,
3626 "created pattern stmt: %G", (gimple *) average_stmt);
3627
3628 return vect_convert_output (vinfo, last_stmt_info,
3629 type, average_stmt, new_vectype);
3630 }
3631
3632 /* Recognize cases in which the input to a cast is wider than its
3633 output, and the input is fed by a widening operation. Fold this
3634 by removing the unnecessary intermediate widening. E.g.:
3635
3636 unsigned char a;
3637 unsigned int b = (unsigned int) a;
3638 unsigned short c = (unsigned short) b;
3639
3640 -->
3641
3642 unsigned short c = (unsigned short) a;
3643
3644 Although this is rare in input IR, it is an expected side-effect
3645 of the over-widening pattern above.
3646
3647 This is beneficial also for integer-to-float conversions, if the
3648 widened integer has more bits than the float, and if the unwidened
3649 input doesn't. */
3650
3651 static gimple *
3652 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3653 stmt_vec_info last_stmt_info, tree *type_out)
3654 {
3655 /* Check for a cast, including an integer-to-float conversion. */
3656 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3657 if (!last_stmt)
3658 return NULL;
3659 tree_code code = gimple_assign_rhs_code (last_stmt);
3660 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3661 return NULL;
3662
3663 /* Make sure that the rhs is a scalar with a natural bitsize. */
3664 tree lhs = gimple_assign_lhs (last_stmt);
3665 if (!lhs)
3666 return NULL;
3667 tree lhs_type = TREE_TYPE (lhs);
3668 scalar_mode lhs_mode;
3669 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3670 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
3671 return NULL;
3672
3673 /* Check for a narrowing operation (from a vector point of view). */
3674 tree rhs = gimple_assign_rhs1 (last_stmt);
3675 tree rhs_type = TREE_TYPE (rhs);
3676 if (!INTEGRAL_TYPE_P (rhs_type)
3677 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3678 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
3679 return NULL;
3680
3681 /* Try to find an unpromoted input. */
3682 vect_unpromoted_value unprom;
3683 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
3684 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3685 return NULL;
3686
3687 /* If the bits above RHS_TYPE matter, make sure that they're the
3688 same when extending from UNPROM as they are when extending from RHS. */
3689 if (!INTEGRAL_TYPE_P (lhs_type)
3690 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3691 return NULL;
3692
3693 /* We can get the same result by casting UNPROM directly, to avoid
3694 the unnecessary widening and narrowing. */
3695 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
3696
3697 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3698 if (!*type_out)
3699 return NULL;
3700
3701 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
3702 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3703 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3704
3705 return pattern_stmt;
3706 }
3707
3708 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3709 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3710
3711 static gimple *
3712 vect_recog_widen_shift_pattern (vec_info *vinfo,
3713 stmt_vec_info last_stmt_info, tree *type_out)
3714 {
3715 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3716 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
3717 "vect_recog_widen_shift_pattern");
3718 }
3719
3720 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3721
3722 type a_t, b_t, c_t;
3723
3724 S0 a_t = b_t r<< c_t;
3725
3726 Input/Output:
3727
3728 * STMT_VINFO: The stmt from which the pattern search begins,
3729 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3730 with a sequence:
3731
3732 S1 d_t = -c_t;
3733 S2 e_t = d_t & (B - 1);
3734 S3 f_t = b_t << c_t;
3735 S4 g_t = b_t >> e_t;
3736 S0 a_t = f_t | g_t;
3737
3738 where B is element bitsize of type.
3739
3740 Output:
3741
3742 * TYPE_OUT: The type of the output of this pattern.
3743
3744 * Return value: A new stmt that will be used to replace the rotate
3745 S0 stmt. */
3746
3747 static gimple *
3748 vect_recog_rotate_pattern (vec_info *vinfo,
3749 stmt_vec_info stmt_vinfo, tree *type_out)
3750 {
3751 gimple *last_stmt = stmt_vinfo->stmt;
3752 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3753 gimple *pattern_stmt, *def_stmt;
3754 enum tree_code rhs_code;
3755 enum vect_def_type dt;
3756 optab optab1, optab2;
3757 edge ext_def = NULL;
3758 bool bswap16_p = false;
3759
3760 if (is_gimple_assign (last_stmt))
3761 {
3762 rhs_code = gimple_assign_rhs_code (last_stmt);
3763 switch (rhs_code)
3764 {
3765 case LROTATE_EXPR:
3766 case RROTATE_EXPR:
3767 break;
3768 default:
3769 return NULL;
3770 }
3771
3772 lhs = gimple_assign_lhs (last_stmt);
3773 oprnd0 = gimple_assign_rhs1 (last_stmt);
3774 type = TREE_TYPE (oprnd0);
3775 oprnd1 = gimple_assign_rhs2 (last_stmt);
3776 }
3777 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3778 {
3779 /* __builtin_bswap16 (x) is another form of x r>> 8.
3780 The vectorizer has bswap support, but only if the argument isn't
3781 promoted. */
3782 lhs = gimple_call_lhs (last_stmt);
3783 oprnd0 = gimple_call_arg (last_stmt, 0);
3784 type = TREE_TYPE (oprnd0);
3785 if (!lhs
3786 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3787 || TYPE_PRECISION (type) <= 16
3788 || TREE_CODE (oprnd0) != SSA_NAME
3789 || BITS_PER_UNIT != 8)
3790 return NULL;
3791
3792 stmt_vec_info def_stmt_info;
3793 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3794 return NULL;
3795
3796 if (dt != vect_internal_def)
3797 return NULL;
3798
3799 if (gimple_assign_cast_p (def_stmt))
3800 {
3801 def = gimple_assign_rhs1 (def_stmt);
3802 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3803 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3804 oprnd0 = def;
3805 }
3806
3807 type = TREE_TYPE (lhs);
3808 vectype = get_vectype_for_scalar_type (vinfo, type);
3809 if (vectype == NULL_TREE)
3810 return NULL;
3811
3812 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3813 {
3814 /* The encoding uses one stepped pattern for each byte in the
3815 16-bit word. */
3816 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
3817 for (unsigned i = 0; i < 3; ++i)
3818 for (unsigned j = 0; j < 2; ++j)
3819 elts.quick_push ((i + 1) * 2 - j - 1);
3820
3821 vec_perm_indices indices (elts, 1,
3822 TYPE_VECTOR_SUBPARTS (char_vectype));
3823 machine_mode vmode = TYPE_MODE (char_vectype);
3824 if (can_vec_perm_const_p (vmode, vmode, indices))
3825 {
3826 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3827 undo the argument promotion. */
3828 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3829 {
3830 def = vect_recog_temp_ssa_var (type, NULL);
3831 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3832 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3833 oprnd0 = def;
3834 }
3835
3836 /* Pattern detected. */
3837 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3838
3839 *type_out = vectype;
3840
3841 /* Pattern supported. Create a stmt to be used to replace the
3842 pattern, with the unpromoted argument. */
3843 var = vect_recog_temp_ssa_var (type, NULL);
3844 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3845 1, oprnd0);
3846 gimple_call_set_lhs (pattern_stmt, var);
3847 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3848 gimple_call_fntype (last_stmt));
3849 return pattern_stmt;
3850 }
3851 }
3852
3853 oprnd1 = build_int_cst (integer_type_node, 8);
3854 rhs_code = LROTATE_EXPR;
3855 bswap16_p = true;
3856 }
3857 else
3858 return NULL;
3859
3860 if (TREE_CODE (oprnd0) != SSA_NAME
3861 || !INTEGRAL_TYPE_P (type)
3862 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
3863 return NULL;
3864
3865 stmt_vec_info def_stmt_info;
3866 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3867 return NULL;
3868
3869 if (dt != vect_internal_def
3870 && dt != vect_constant_def
3871 && dt != vect_external_def)
3872 return NULL;
3873
3874 vectype = get_vectype_for_scalar_type (vinfo, type);
3875 if (vectype == NULL_TREE)
3876 return NULL;
3877
3878 /* If vector/vector or vector/scalar rotate is supported by the target,
3879 don't do anything here. */
3880 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3881 if (optab1
3882 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3883 {
3884 use_rotate:
3885 if (bswap16_p)
3886 {
3887 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3888 {
3889 def = vect_recog_temp_ssa_var (type, NULL);
3890 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3891 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3892 oprnd0 = def;
3893 }
3894
3895 /* Pattern detected. */
3896 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3897
3898 *type_out = vectype;
3899
3900 /* Pattern supported. Create a stmt to be used to replace the
3901 pattern. */
3902 var = vect_recog_temp_ssa_var (type, NULL);
3903 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3904 oprnd1);
3905 return pattern_stmt;
3906 }
3907 return NULL;
3908 }
3909
3910 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3911 {
3912 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3913 if (optab2
3914 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3915 goto use_rotate;
3916 }
3917
3918 tree utype = unsigned_type_for (type);
3919 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3920 if (!uvectype)
3921 return NULL;
3922
3923 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3924 don't do anything here either. */
3925 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3926 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3927 if (!optab1
3928 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3929 || !optab2
3930 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3931 {
3932 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3933 return NULL;
3934 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3935 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3936 if (!optab1
3937 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3938 || !optab2
3939 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3940 return NULL;
3941 }
3942
3943 *type_out = vectype;
3944
3945 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3946 {
3947 def = vect_recog_temp_ssa_var (utype, NULL);
3948 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3949 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3950 oprnd0 = def;
3951 }
3952
3953 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3954 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3955
3956 def = NULL_TREE;
3957 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3958 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3959 def = oprnd1;
3960 else if (def_stmt && gimple_assign_cast_p (def_stmt))
3961 {
3962 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3963 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3964 && TYPE_PRECISION (TREE_TYPE (rhs1))
3965 == TYPE_PRECISION (type))
3966 def = rhs1;
3967 }
3968
3969 if (def == NULL_TREE)
3970 {
3971 def = vect_recog_temp_ssa_var (utype, NULL);
3972 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3973 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3974 }
3975 stype = TREE_TYPE (def);
3976
3977 if (TREE_CODE (def) == INTEGER_CST)
3978 {
3979 if (!tree_fits_uhwi_p (def)
3980 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3981 || integer_zerop (def))
3982 return NULL;
3983 def2 = build_int_cst (stype,
3984 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3985 }
3986 else
3987 {
3988 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3989
3990 if (vecstype == NULL_TREE)
3991 return NULL;
3992 def2 = vect_recog_temp_ssa_var (stype, NULL);
3993 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3994 if (ext_def)
3995 {
3996 basic_block new_bb
3997 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3998 gcc_assert (!new_bb);
3999 }
4000 else
4001 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4002
4003 def2 = vect_recog_temp_ssa_var (stype, NULL);
4004 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
4005 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
4006 gimple_assign_lhs (def_stmt), mask);
4007 if (ext_def)
4008 {
4009 basic_block new_bb
4010 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
4011 gcc_assert (!new_bb);
4012 }
4013 else
4014 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4015 }
4016
4017 var1 = vect_recog_temp_ssa_var (utype, NULL);
4018 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
4019 ? LSHIFT_EXPR : RSHIFT_EXPR,
4020 oprnd0, def);
4021 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
4022
4023 var2 = vect_recog_temp_ssa_var (utype, NULL);
4024 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
4025 ? RSHIFT_EXPR : LSHIFT_EXPR,
4026 oprnd0, def2);
4027 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
4028
4029 /* Pattern detected. */
4030 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
4031
4032 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4033 var = vect_recog_temp_ssa_var (utype, NULL);
4034 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
4035
4036 if (!useless_type_conversion_p (type, utype))
4037 {
4038 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
4039 tree result = vect_recog_temp_ssa_var (type, NULL);
4040 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
4041 }
4042 return pattern_stmt;
4043 }
4044
4045 /* Detect a vector by vector shift pattern that wouldn't be otherwise
4046 vectorized:
4047
4048 type a_t;
4049 TYPE b_T, res_T;
4050
4051 S1 a_t = ;
4052 S2 b_T = ;
4053 S3 res_T = b_T op a_t;
4054
4055 where type 'TYPE' is a type with different size than 'type',
4056 and op is <<, >> or rotate.
4057
4058 Also detect cases:
4059
4060 type a_t;
4061 TYPE b_T, c_T, res_T;
4062
4063 S0 c_T = ;
4064 S1 a_t = (type) c_T;
4065 S2 b_T = ;
4066 S3 res_T = b_T op a_t;
4067
4068 Input/Output:
4069
4070 * STMT_VINFO: The stmt from which the pattern search begins,
4071 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4072 with a shift/rotate which has same type on both operands, in the
4073 second case just b_T op c_T, in the first case with added cast
4074 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4075
4076 Output:
4077
4078 * TYPE_OUT: The type of the output of this pattern.
4079
4080 * Return value: A new stmt that will be used to replace the shift/rotate
4081 S3 stmt. */
4082
4083 static gimple *
4084 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
4085 stmt_vec_info stmt_vinfo,
4086 tree *type_out)
4087 {
4088 gimple *last_stmt = stmt_vinfo->stmt;
4089 tree oprnd0, oprnd1, lhs, var;
4090 gimple *pattern_stmt;
4091 enum tree_code rhs_code;
4092
4093 if (!is_gimple_assign (last_stmt))
4094 return NULL;
4095
4096 rhs_code = gimple_assign_rhs_code (last_stmt);
4097 switch (rhs_code)
4098 {
4099 case LSHIFT_EXPR:
4100 case RSHIFT_EXPR:
4101 case LROTATE_EXPR:
4102 case RROTATE_EXPR:
4103 break;
4104 default:
4105 return NULL;
4106 }
4107
4108 lhs = gimple_assign_lhs (last_stmt);
4109 oprnd0 = gimple_assign_rhs1 (last_stmt);
4110 oprnd1 = gimple_assign_rhs2 (last_stmt);
4111 if (TREE_CODE (oprnd0) != SSA_NAME
4112 || TREE_CODE (oprnd1) != SSA_NAME
4113 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
4114 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
4115 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
4116 || TYPE_PRECISION (TREE_TYPE (lhs))
4117 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
4118 return NULL;
4119
4120 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
4121 if (!def_vinfo)
4122 return NULL;
4123
4124 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
4125 if (*type_out == NULL_TREE)
4126 return NULL;
4127
4128 tree def = NULL_TREE;
4129 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
4130 if (def_stmt && gimple_assign_cast_p (def_stmt))
4131 {
4132 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4133 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
4134 && TYPE_PRECISION (TREE_TYPE (rhs1))
4135 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
4136 {
4137 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
4138 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
4139 def = rhs1;
4140 else
4141 {
4142 tree mask
4143 = build_low_bits_mask (TREE_TYPE (rhs1),
4144 TYPE_PRECISION (TREE_TYPE (oprnd1)));
4145 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4146 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
4147 tree vecstype = get_vectype_for_scalar_type (vinfo,
4148 TREE_TYPE (rhs1));
4149 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4150 }
4151 }
4152 }
4153
4154 if (def == NULL_TREE)
4155 {
4156 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4157 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
4158 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4159 }
4160
4161 /* Pattern detected. */
4162 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
4163
4164 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4165 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4166 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
4167
4168 return pattern_stmt;
4169 }
4170
4171 /* Return true iff the target has a vector optab implementing the operation
4172 CODE on type VECTYPE. */
4173
4174 static bool
4175 target_has_vecop_for_code (tree_code code, tree vectype)
4176 {
4177 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
4178 return voptab
4179 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
4180 }
4181
4182 /* Verify that the target has optabs of VECTYPE to perform all the steps
4183 needed by the multiplication-by-immediate synthesis algorithm described by
4184 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4185 present. Return true iff the target supports all the steps. */
4186
4187 static bool
4188 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
4189 tree vectype, bool synth_shift_p)
4190 {
4191 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
4192 return false;
4193
4194 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
4195 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
4196
4197 if (var == negate_variant
4198 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
4199 return false;
4200
4201 /* If we must synthesize shifts with additions make sure that vector
4202 addition is available. */
4203 if ((var == add_variant || synth_shift_p) && !supports_vplus)
4204 return false;
4205
4206 for (int i = 1; i < alg->ops; i++)
4207 {
4208 switch (alg->op[i])
4209 {
4210 case alg_shift:
4211 break;
4212 case alg_add_t_m2:
4213 case alg_add_t2_m:
4214 case alg_add_factor:
4215 if (!supports_vplus)
4216 return false;
4217 break;
4218 case alg_sub_t_m2:
4219 case alg_sub_t2_m:
4220 case alg_sub_factor:
4221 if (!supports_vminus)
4222 return false;
4223 break;
4224 case alg_unknown:
4225 case alg_m:
4226 case alg_zero:
4227 case alg_impossible:
4228 return false;
4229 default:
4230 gcc_unreachable ();
4231 }
4232 }
4233
4234 return true;
4235 }
4236
4237 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4238 putting the final result in DEST. Append all statements but the last into
4239 VINFO. Return the last statement. */
4240
4241 static gimple *
4242 synth_lshift_by_additions (vec_info *vinfo,
4243 tree dest, tree op, HOST_WIDE_INT amnt,
4244 stmt_vec_info stmt_info)
4245 {
4246 HOST_WIDE_INT i;
4247 tree itype = TREE_TYPE (op);
4248 tree prev_res = op;
4249 gcc_assert (amnt >= 0);
4250 for (i = 0; i < amnt; i++)
4251 {
4252 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
4253 : dest;
4254 gimple *stmt
4255 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
4256 prev_res = tmp_var;
4257 if (i < amnt - 1)
4258 append_pattern_def_seq (vinfo, stmt_info, stmt);
4259 else
4260 return stmt;
4261 }
4262 gcc_unreachable ();
4263 return NULL;
4264 }
4265
4266 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4267 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4268 the process if necessary. Append the resulting assignment statements
4269 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4270 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4271 left shifts using additions. */
4272
4273 static tree
4274 apply_binop_and_append_stmt (vec_info *vinfo,
4275 tree_code code, tree op1, tree op2,
4276 stmt_vec_info stmt_vinfo, bool synth_shift_p)
4277 {
4278 if (integer_zerop (op2)
4279 && (code == LSHIFT_EXPR
4280 || code == PLUS_EXPR))
4281 {
4282 gcc_assert (TREE_CODE (op1) == SSA_NAME);
4283 return op1;
4284 }
4285
4286 gimple *stmt;
4287 tree itype = TREE_TYPE (op1);
4288 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
4289
4290 if (code == LSHIFT_EXPR
4291 && synth_shift_p)
4292 {
4293 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
4294 TREE_INT_CST_LOW (op2), stmt_vinfo);
4295 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4296 return tmp_var;
4297 }
4298
4299 stmt = gimple_build_assign (tmp_var, code, op1, op2);
4300 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4301 return tmp_var;
4302 }
4303
4304 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4305 and simple arithmetic operations to be vectorized. Record the statements
4306 produced in STMT_VINFO and return the last statement in the sequence or
4307 NULL if it's not possible to synthesize such a multiplication.
4308 This function mirrors the behavior of expand_mult_const in expmed.cc but
4309 works on tree-ssa form. */
4310
4311 static gimple *
4312 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
4313 stmt_vec_info stmt_vinfo)
4314 {
4315 tree itype = TREE_TYPE (op);
4316 machine_mode mode = TYPE_MODE (itype);
4317 struct algorithm alg;
4318 mult_variant variant;
4319 if (!tree_fits_shwi_p (val))
4320 return NULL;
4321
4322 /* Multiplication synthesis by shifts, adds and subs can introduce
4323 signed overflow where the original operation didn't. Perform the
4324 operations on an unsigned type and cast back to avoid this.
4325 In the future we may want to relax this for synthesis algorithms
4326 that we can prove do not cause unexpected overflow. */
4327 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
4328
4329 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
4330 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
4331 if (!vectype)
4332 return NULL;
4333
4334 /* Targets that don't support vector shifts but support vector additions
4335 can synthesize shifts that way. */
4336 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
4337
4338 HOST_WIDE_INT hwval = tree_to_shwi (val);
4339 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4340 The vectorizer's benefit analysis will decide whether it's beneficial
4341 to do this. */
4342 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
4343 ? TYPE_MODE (vectype) : mode,
4344 hwval, &alg, &variant, MAX_COST);
4345 if (!possible)
4346 return NULL;
4347
4348 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
4349 return NULL;
4350
4351 tree accumulator;
4352
4353 /* Clear out the sequence of statements so we can populate it below. */
4354 gimple *stmt = NULL;
4355
4356 if (cast_to_unsigned_p)
4357 {
4358 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
4359 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
4360 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4361 op = tmp_op;
4362 }
4363
4364 if (alg.op[0] == alg_zero)
4365 accumulator = build_int_cst (multtype, 0);
4366 else
4367 accumulator = op;
4368
4369 bool needs_fixup = (variant == negate_variant)
4370 || (variant == add_variant);
4371
4372 for (int i = 1; i < alg.ops; i++)
4373 {
4374 tree shft_log = build_int_cst (multtype, alg.log[i]);
4375 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4376 tree tmp_var = NULL_TREE;
4377
4378 switch (alg.op[i])
4379 {
4380 case alg_shift:
4381 if (synth_shift_p)
4382 stmt
4383 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
4384 alg.log[i], stmt_vinfo);
4385 else
4386 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
4387 shft_log);
4388 break;
4389 case alg_add_t_m2:
4390 tmp_var
4391 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
4392 stmt_vinfo, synth_shift_p);
4393 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4394 tmp_var);
4395 break;
4396 case alg_sub_t_m2:
4397 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
4398 shft_log, stmt_vinfo,
4399 synth_shift_p);
4400 /* In some algorithms the first step involves zeroing the
4401 accumulator. If subtracting from such an accumulator
4402 just emit the negation directly. */
4403 if (integer_zerop (accumulator))
4404 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
4405 else
4406 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
4407 tmp_var);
4408 break;
4409 case alg_add_t2_m:
4410 tmp_var
4411 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4412 shft_log, stmt_vinfo, synth_shift_p);
4413 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4414 break;
4415 case alg_sub_t2_m:
4416 tmp_var
4417 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4418 shft_log, stmt_vinfo, synth_shift_p);
4419 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4420 break;
4421 case alg_add_factor:
4422 tmp_var
4423 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4424 shft_log, stmt_vinfo, synth_shift_p);
4425 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4426 tmp_var);
4427 break;
4428 case alg_sub_factor:
4429 tmp_var
4430 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4431 shft_log, stmt_vinfo, synth_shift_p);
4432 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4433 accumulator);
4434 break;
4435 default:
4436 gcc_unreachable ();
4437 }
4438 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4439 but rather return it directly. */
4440
4441 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4442 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4443 accumulator = accum_tmp;
4444 }
4445 if (variant == negate_variant)
4446 {
4447 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4448 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4449 accumulator = accum_tmp;
4450 if (cast_to_unsigned_p)
4451 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4452 }
4453 else if (variant == add_variant)
4454 {
4455 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4456 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4457 accumulator = accum_tmp;
4458 if (cast_to_unsigned_p)
4459 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4460 }
4461 /* Move back to a signed if needed. */
4462 if (cast_to_unsigned_p)
4463 {
4464 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
4465 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4466 }
4467
4468 return stmt;
4469 }
4470
4471 /* Detect multiplication by constant and convert it into a sequence of
4472 shifts and additions, subtractions, negations. We reuse the
4473 choose_mult_variant algorithms from expmed.cc
4474
4475 Input/Output:
4476
4477 STMT_VINFO: The stmt from which the pattern search begins,
4478 i.e. the mult stmt.
4479
4480 Output:
4481
4482 * TYPE_OUT: The type of the output of this pattern.
4483
4484 * Return value: A new stmt that will be used to replace
4485 the multiplication. */
4486
4487 static gimple *
4488 vect_recog_mult_pattern (vec_info *vinfo,
4489 stmt_vec_info stmt_vinfo, tree *type_out)
4490 {
4491 gimple *last_stmt = stmt_vinfo->stmt;
4492 tree oprnd0, oprnd1, vectype, itype;
4493 gimple *pattern_stmt;
4494
4495 if (!is_gimple_assign (last_stmt))
4496 return NULL;
4497
4498 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
4499 return NULL;
4500
4501 oprnd0 = gimple_assign_rhs1 (last_stmt);
4502 oprnd1 = gimple_assign_rhs2 (last_stmt);
4503 itype = TREE_TYPE (oprnd0);
4504
4505 if (TREE_CODE (oprnd0) != SSA_NAME
4506 || TREE_CODE (oprnd1) != INTEGER_CST
4507 || !INTEGRAL_TYPE_P (itype)
4508 || !type_has_mode_precision_p (itype))
4509 return NULL;
4510
4511 vectype = get_vectype_for_scalar_type (vinfo, itype);
4512 if (vectype == NULL_TREE)
4513 return NULL;
4514
4515 /* If the target can handle vectorized multiplication natively,
4516 don't attempt to optimize this. */
4517 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4518 if (mul_optab != unknown_optab)
4519 {
4520 machine_mode vec_mode = TYPE_MODE (vectype);
4521 int icode = (int) optab_handler (mul_optab, vec_mode);
4522 if (icode != CODE_FOR_nothing)
4523 return NULL;
4524 }
4525
4526 pattern_stmt = vect_synth_mult_by_constant (vinfo,
4527 oprnd0, oprnd1, stmt_vinfo);
4528 if (!pattern_stmt)
4529 return NULL;
4530
4531 /* Pattern detected. */
4532 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
4533
4534 *type_out = vectype;
4535
4536 return pattern_stmt;
4537 }
4538
4539 /* Detect a signed division by a constant that wouldn't be
4540 otherwise vectorized:
4541
4542 type a_t, b_t;
4543
4544 S1 a_t = b_t / N;
4545
4546 where type 'type' is an integral type and N is a constant.
4547
4548 Similarly handle modulo by a constant:
4549
4550 S4 a_t = b_t % N;
4551
4552 Input/Output:
4553
4554 * STMT_VINFO: The stmt from which the pattern search begins,
4555 i.e. the division stmt. S1 is replaced by if N is a power
4556 of two constant and type is signed:
4557 S3 y_t = b_t < 0 ? N - 1 : 0;
4558 S2 x_t = b_t + y_t;
4559 S1' a_t = x_t >> log2 (N);
4560
4561 S4 is replaced if N is a power of two constant and
4562 type is signed by (where *_T temporaries have unsigned type):
4563 S9 y_T = b_t < 0 ? -1U : 0U;
4564 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4565 S7 z_t = (type) z_T;
4566 S6 w_t = b_t + z_t;
4567 S5 x_t = w_t & (N - 1);
4568 S4' a_t = x_t - z_t;
4569
4570 Output:
4571
4572 * TYPE_OUT: The type of the output of this pattern.
4573
4574 * Return value: A new stmt that will be used to replace the division
4575 S1 or modulo S4 stmt. */
4576
4577 static gimple *
4578 vect_recog_divmod_pattern (vec_info *vinfo,
4579 stmt_vec_info stmt_vinfo, tree *type_out)
4580 {
4581 gimple *last_stmt = stmt_vinfo->stmt;
4582 tree oprnd0, oprnd1, vectype, itype, cond;
4583 gimple *pattern_stmt, *def_stmt;
4584 enum tree_code rhs_code;
4585 optab optab;
4586 tree q, cst;
4587 int dummy_int, prec;
4588
4589 if (!is_gimple_assign (last_stmt))
4590 return NULL;
4591
4592 rhs_code = gimple_assign_rhs_code (last_stmt);
4593 switch (rhs_code)
4594 {
4595 case TRUNC_DIV_EXPR:
4596 case EXACT_DIV_EXPR:
4597 case TRUNC_MOD_EXPR:
4598 break;
4599 default:
4600 return NULL;
4601 }
4602
4603 oprnd0 = gimple_assign_rhs1 (last_stmt);
4604 oprnd1 = gimple_assign_rhs2 (last_stmt);
4605 itype = TREE_TYPE (oprnd0);
4606 if (TREE_CODE (oprnd0) != SSA_NAME
4607 || TREE_CODE (oprnd1) != INTEGER_CST
4608 || TREE_CODE (itype) != INTEGER_TYPE
4609 || !type_has_mode_precision_p (itype))
4610 return NULL;
4611
4612 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
4613 vectype = get_vectype_for_scalar_type (vinfo, itype);
4614 if (vectype == NULL_TREE)
4615 return NULL;
4616
4617 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
4618 {
4619 /* If the target can handle vectorized division or modulo natively,
4620 don't attempt to optimize this, since native division is likely
4621 to give smaller code. */
4622 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
4623 if (optab != unknown_optab)
4624 {
4625 machine_mode vec_mode = TYPE_MODE (vectype);
4626 int icode = (int) optab_handler (optab, vec_mode);
4627 if (icode != CODE_FOR_nothing)
4628 return NULL;
4629 }
4630 }
4631
4632 prec = TYPE_PRECISION (itype);
4633 if (integer_pow2p (oprnd1))
4634 {
4635 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
4636 return NULL;
4637
4638 /* Pattern detected. */
4639 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4640
4641 *type_out = vectype;
4642
4643 /* Check if the target supports this internal function. */
4644 internal_fn ifn = IFN_DIV_POW2;
4645 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
4646 {
4647 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
4648
4649 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
4650 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
4651 gimple_call_set_lhs (div_stmt, var_div);
4652
4653 if (rhs_code == TRUNC_MOD_EXPR)
4654 {
4655 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
4656 def_stmt
4657 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4658 LSHIFT_EXPR, var_div, shift);
4659 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4660 pattern_stmt
4661 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4662 MINUS_EXPR, oprnd0,
4663 gimple_assign_lhs (def_stmt));
4664 }
4665 else
4666 pattern_stmt = div_stmt;
4667 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
4668
4669 return pattern_stmt;
4670 }
4671
4672 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
4673 build_int_cst (itype, 0));
4674 if (rhs_code == TRUNC_DIV_EXPR
4675 || rhs_code == EXACT_DIV_EXPR)
4676 {
4677 tree var = vect_recog_temp_ssa_var (itype, NULL);
4678 tree shift;
4679 def_stmt
4680 = gimple_build_assign (var, COND_EXPR, cond,
4681 fold_build2 (MINUS_EXPR, itype, oprnd1,
4682 build_int_cst (itype, 1)),
4683 build_int_cst (itype, 0));
4684 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4685 var = vect_recog_temp_ssa_var (itype, NULL);
4686 def_stmt
4687 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
4688 gimple_assign_lhs (def_stmt));
4689 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4690
4691 shift = build_int_cst (itype, tree_log2 (oprnd1));
4692 pattern_stmt
4693 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4694 RSHIFT_EXPR, var, shift);
4695 }
4696 else
4697 {
4698 tree signmask;
4699 if (compare_tree_int (oprnd1, 2) == 0)
4700 {
4701 signmask = vect_recog_temp_ssa_var (itype, NULL);
4702 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
4703 build_int_cst (itype, 1),
4704 build_int_cst (itype, 0));
4705 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4706 }
4707 else
4708 {
4709 tree utype
4710 = build_nonstandard_integer_type (prec, 1);
4711 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
4712 tree shift
4713 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
4714 - tree_log2 (oprnd1));
4715 tree var = vect_recog_temp_ssa_var (utype, NULL);
4716
4717 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
4718 build_int_cst (utype, -1),
4719 build_int_cst (utype, 0));
4720 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4721 var = vect_recog_temp_ssa_var (utype, NULL);
4722 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
4723 gimple_assign_lhs (def_stmt),
4724 shift);
4725 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4726 signmask = vect_recog_temp_ssa_var (itype, NULL);
4727 def_stmt
4728 = gimple_build_assign (signmask, NOP_EXPR, var);
4729 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4730 }
4731 def_stmt
4732 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4733 PLUS_EXPR, oprnd0, signmask);
4734 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4735 def_stmt
4736 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4737 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
4738 fold_build2 (MINUS_EXPR, itype, oprnd1,
4739 build_int_cst (itype, 1)));
4740 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4741
4742 pattern_stmt
4743 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4744 MINUS_EXPR, gimple_assign_lhs (def_stmt),
4745 signmask);
4746 }
4747
4748 return pattern_stmt;
4749 }
4750
4751 if ((cst = uniform_integer_cst_p (oprnd1))
4752 && TYPE_UNSIGNED (itype)
4753 && rhs_code == TRUNC_DIV_EXPR
4754 && vectype
4755 && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
4756 {
4757 /* We can use the relationship:
4758
4759 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4760
4761 to optimize cases where N+1 is a power of 2, and where // (N+1)
4762 is therefore a shift right. When operating in modes that are
4763 multiples of a byte in size, there are two cases:
4764
4765 (1) N(N+3) is not representable, in which case the question
4766 becomes whether the replacement expression overflows.
4767 It is enough to test that x+N+2 does not overflow,
4768 i.e. that x < MAX-(N+1).
4769
4770 (2) N(N+3) is representable, in which case it is the (only)
4771 bound that we need to check.
4772
4773 ??? For now we just handle the case where // (N+1) is a shift
4774 right by half the precision, since some architectures can
4775 optimize the associated addition and shift combinations
4776 into single instructions. */
4777
4778 auto wcst = wi::to_wide (cst);
4779 int pow = wi::exact_log2 (wcst + 1);
4780 if (pow == prec / 2)
4781 {
4782 gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
4783
4784 gimple_ranger ranger;
4785 int_range_max r;
4786
4787 /* Check that no overflow will occur. If we don't have range
4788 information we can't perform the optimization. */
4789
4790 if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
4791 {
4792 wide_int max = r.upper_bound ();
4793 wide_int one = wi::shwi (1, prec);
4794 wide_int adder = wi::add (one, wi::lshift (one, pow));
4795 wi::overflow_type ovf;
4796 wi::add (max, adder, UNSIGNED, &ovf);
4797 if (ovf == wi::OVF_NONE)
4798 {
4799 *type_out = vectype;
4800 tree tadder = wide_int_to_tree (itype, adder);
4801 tree rshift = wide_int_to_tree (itype, pow);
4802
4803 tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
4804 gassign *patt1
4805 = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
4806 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4807
4808 tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
4809 patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
4810 rshift);
4811 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4812
4813 tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
4814 patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
4815 oprnd0);
4816 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4817
4818 tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
4819 pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
4820 new_lhs3, rshift);
4821
4822 return pattern_stmt;
4823 }
4824 }
4825 }
4826 }
4827
4828 if (prec > HOST_BITS_PER_WIDE_INT
4829 || integer_zerop (oprnd1))
4830 return NULL;
4831
4832 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
4833 return NULL;
4834
4835 if (TYPE_UNSIGNED (itype))
4836 {
4837 unsigned HOST_WIDE_INT mh, ml;
4838 int pre_shift, post_shift;
4839 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
4840 & GET_MODE_MASK (itype_mode));
4841 tree t1, t2, t3, t4;
4842
4843 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
4844 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
4845 return NULL;
4846
4847 /* Find a suitable multiplier and right shift count
4848 instead of multiplying with D. */
4849 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
4850
4851 /* If the suggested multiplier is more than SIZE bits, we can do better
4852 for even divisors, using an initial right shift. */
4853 if (mh != 0 && (d & 1) == 0)
4854 {
4855 pre_shift = ctz_or_zero (d);
4856 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
4857 &ml, &post_shift, &dummy_int);
4858 gcc_assert (!mh);
4859 }
4860 else
4861 pre_shift = 0;
4862
4863 if (mh != 0)
4864 {
4865 if (post_shift - 1 >= prec)
4866 return NULL;
4867
4868 /* t1 = oprnd0 h* ml;
4869 t2 = oprnd0 - t1;
4870 t3 = t2 >> 1;
4871 t4 = t1 + t3;
4872 q = t4 >> (post_shift - 1); */
4873 t1 = vect_recog_temp_ssa_var (itype, NULL);
4874 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4875 build_int_cst (itype, ml));
4876 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4877
4878 t2 = vect_recog_temp_ssa_var (itype, NULL);
4879 def_stmt
4880 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
4881 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4882
4883 t3 = vect_recog_temp_ssa_var (itype, NULL);
4884 def_stmt
4885 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
4886 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4887
4888 t4 = vect_recog_temp_ssa_var (itype, NULL);
4889 def_stmt
4890 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
4891
4892 if (post_shift != 1)
4893 {
4894 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4895
4896 q = vect_recog_temp_ssa_var (itype, NULL);
4897 pattern_stmt
4898 = gimple_build_assign (q, RSHIFT_EXPR, t4,
4899 build_int_cst (itype, post_shift - 1));
4900 }
4901 else
4902 {
4903 q = t4;
4904 pattern_stmt = def_stmt;
4905 }
4906 }
4907 else
4908 {
4909 if (pre_shift >= prec || post_shift >= prec)
4910 return NULL;
4911
4912 /* t1 = oprnd0 >> pre_shift;
4913 t2 = t1 h* ml;
4914 q = t2 >> post_shift; */
4915 if (pre_shift)
4916 {
4917 t1 = vect_recog_temp_ssa_var (itype, NULL);
4918 def_stmt
4919 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
4920 build_int_cst (NULL, pre_shift));
4921 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4922 }
4923 else
4924 t1 = oprnd0;
4925
4926 t2 = vect_recog_temp_ssa_var (itype, NULL);
4927 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
4928 build_int_cst (itype, ml));
4929
4930 if (post_shift)
4931 {
4932 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4933
4934 q = vect_recog_temp_ssa_var (itype, NULL);
4935 def_stmt
4936 = gimple_build_assign (q, RSHIFT_EXPR, t2,
4937 build_int_cst (itype, post_shift));
4938 }
4939 else
4940 q = t2;
4941
4942 pattern_stmt = def_stmt;
4943 }
4944 }
4945 else
4946 {
4947 unsigned HOST_WIDE_INT ml;
4948 int post_shift;
4949 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
4950 unsigned HOST_WIDE_INT abs_d;
4951 bool add = false;
4952 tree t1, t2, t3, t4;
4953
4954 /* Give up for -1. */
4955 if (d == -1)
4956 return NULL;
4957
4958 /* Since d might be INT_MIN, we have to cast to
4959 unsigned HOST_WIDE_INT before negating to avoid
4960 undefined signed overflow. */
4961 abs_d = (d >= 0
4962 ? (unsigned HOST_WIDE_INT) d
4963 : - (unsigned HOST_WIDE_INT) d);
4964
4965 /* n rem d = n rem -d */
4966 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
4967 {
4968 d = abs_d;
4969 oprnd1 = build_int_cst (itype, abs_d);
4970 }
4971 if (HOST_BITS_PER_WIDE_INT >= prec
4972 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
4973 /* This case is not handled correctly below. */
4974 return NULL;
4975
4976 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
4977 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
4978 {
4979 add = true;
4980 ml |= HOST_WIDE_INT_M1U << (prec - 1);
4981 }
4982 if (post_shift >= prec)
4983 return NULL;
4984
4985 /* t1 = oprnd0 h* ml; */
4986 t1 = vect_recog_temp_ssa_var (itype, NULL);
4987 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4988 build_int_cst (itype, ml));
4989
4990 if (add)
4991 {
4992 /* t2 = t1 + oprnd0; */
4993 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4994 t2 = vect_recog_temp_ssa_var (itype, NULL);
4995 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
4996 }
4997 else
4998 t2 = t1;
4999
5000 if (post_shift)
5001 {
5002 /* t3 = t2 >> post_shift; */
5003 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5004 t3 = vect_recog_temp_ssa_var (itype, NULL);
5005 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
5006 build_int_cst (itype, post_shift));
5007 }
5008 else
5009 t3 = t2;
5010
5011 int msb = 1;
5012 value_range r;
5013 get_range_query (cfun)->range_of_expr (r, oprnd0);
5014 if (!r.varying_p () && !r.undefined_p ())
5015 {
5016 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
5017 msb = 0;
5018 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
5019 msb = -1;
5020 }
5021
5022 if (msb == 0 && d >= 0)
5023 {
5024 /* q = t3; */
5025 q = t3;
5026 pattern_stmt = def_stmt;
5027 }
5028 else
5029 {
5030 /* t4 = oprnd0 >> (prec - 1);
5031 or if we know from VRP that oprnd0 >= 0
5032 t4 = 0;
5033 or if we know from VRP that oprnd0 < 0
5034 t4 = -1; */
5035 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5036 t4 = vect_recog_temp_ssa_var (itype, NULL);
5037 if (msb != 1)
5038 def_stmt = gimple_build_assign (t4, INTEGER_CST,
5039 build_int_cst (itype, msb));
5040 else
5041 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
5042 build_int_cst (itype, prec - 1));
5043 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5044
5045 /* q = t3 - t4; or q = t4 - t3; */
5046 q = vect_recog_temp_ssa_var (itype, NULL);
5047 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
5048 d < 0 ? t3 : t4);
5049 }
5050 }
5051
5052 if (rhs_code == TRUNC_MOD_EXPR)
5053 {
5054 tree r, t1;
5055
5056 /* We divided. Now finish by:
5057 t1 = q * oprnd1;
5058 r = oprnd0 - t1; */
5059 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5060
5061 t1 = vect_recog_temp_ssa_var (itype, NULL);
5062 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
5063 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5064
5065 r = vect_recog_temp_ssa_var (itype, NULL);
5066 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
5067 }
5068
5069 /* Pattern detected. */
5070 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
5071
5072 *type_out = vectype;
5073 return pattern_stmt;
5074 }
5075
5076 /* Function vect_recog_mixed_size_cond_pattern
5077
5078 Try to find the following pattern:
5079
5080 type x_t, y_t;
5081 TYPE a_T, b_T, c_T;
5082 loop:
5083 S1 a_T = x_t CMP y_t ? b_T : c_T;
5084
5085 where type 'TYPE' is an integral type which has different size
5086 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5087 than 'type', the constants need to fit into an integer type
5088 with the same width as 'type') or results of conversion from 'type'.
5089
5090 Input:
5091
5092 * STMT_VINFO: The stmt from which the pattern search begins.
5093
5094 Output:
5095
5096 * TYPE_OUT: The type of the output of this pattern.
5097
5098 * Return value: A new stmt that will be used to replace the pattern.
5099 Additionally a def_stmt is added.
5100
5101 a_it = x_t CMP y_t ? b_it : c_it;
5102 a_T = (TYPE) a_it; */
5103
5104 static gimple *
5105 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
5106 stmt_vec_info stmt_vinfo, tree *type_out)
5107 {
5108 gimple *last_stmt = stmt_vinfo->stmt;
5109 tree cond_expr, then_clause, else_clause;
5110 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
5111 gimple *pattern_stmt, *def_stmt;
5112 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
5113 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
5114 bool promotion;
5115 tree comp_scalar_type;
5116
5117 if (!is_gimple_assign (last_stmt)
5118 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
5119 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
5120 return NULL;
5121
5122 cond_expr = gimple_assign_rhs1 (last_stmt);
5123 then_clause = gimple_assign_rhs2 (last_stmt);
5124 else_clause = gimple_assign_rhs3 (last_stmt);
5125
5126 if (!COMPARISON_CLASS_P (cond_expr))
5127 return NULL;
5128
5129 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
5130 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
5131 if (comp_vectype == NULL_TREE)
5132 return NULL;
5133
5134 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
5135 if (types_compatible_p (type, comp_scalar_type)
5136 || ((TREE_CODE (then_clause) != INTEGER_CST
5137 || TREE_CODE (else_clause) != INTEGER_CST)
5138 && !INTEGRAL_TYPE_P (comp_scalar_type))
5139 || !INTEGRAL_TYPE_P (type))
5140 return NULL;
5141
5142 if ((TREE_CODE (then_clause) != INTEGER_CST
5143 && !type_conversion_p (vinfo, then_clause, false,
5144 &orig_type0, &def_stmt0, &promotion))
5145 || (TREE_CODE (else_clause) != INTEGER_CST
5146 && !type_conversion_p (vinfo, else_clause, false,
5147 &orig_type1, &def_stmt1, &promotion)))
5148 return NULL;
5149
5150 if (orig_type0 && orig_type1
5151 && !types_compatible_p (orig_type0, orig_type1))
5152 return NULL;
5153
5154 if (orig_type0)
5155 {
5156 if (!types_compatible_p (orig_type0, comp_scalar_type))
5157 return NULL;
5158 then_clause = gimple_assign_rhs1 (def_stmt0);
5159 itype = orig_type0;
5160 }
5161
5162 if (orig_type1)
5163 {
5164 if (!types_compatible_p (orig_type1, comp_scalar_type))
5165 return NULL;
5166 else_clause = gimple_assign_rhs1 (def_stmt1);
5167 itype = orig_type1;
5168 }
5169
5170
5171 HOST_WIDE_INT cmp_mode_size
5172 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
5173
5174 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
5175 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
5176 return NULL;
5177
5178 vectype = get_vectype_for_scalar_type (vinfo, type);
5179 if (vectype == NULL_TREE)
5180 return NULL;
5181
5182 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
5183 return NULL;
5184
5185 if (itype == NULL_TREE)
5186 itype = build_nonstandard_integer_type (cmp_mode_size,
5187 TYPE_UNSIGNED (type));
5188
5189 if (itype == NULL_TREE
5190 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
5191 return NULL;
5192
5193 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5194 if (vecitype == NULL_TREE)
5195 return NULL;
5196
5197 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
5198 return NULL;
5199
5200 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
5201 {
5202 if ((TREE_CODE (then_clause) == INTEGER_CST
5203 && !int_fits_type_p (then_clause, itype))
5204 || (TREE_CODE (else_clause) == INTEGER_CST
5205 && !int_fits_type_p (else_clause, itype)))
5206 return NULL;
5207 }
5208
5209 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5210 COND_EXPR, unshare_expr (cond_expr),
5211 fold_convert (itype, then_clause),
5212 fold_convert (itype, else_clause));
5213 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5214 NOP_EXPR, gimple_assign_lhs (def_stmt));
5215
5216 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
5217 *type_out = vectype;
5218
5219 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
5220
5221 return pattern_stmt;
5222 }
5223
5224
5225 /* Helper function of vect_recog_bool_pattern. Called recursively, return
5226 true if bool VAR can and should be optimized that way. Assume it shouldn't
5227 in case it's a result of a comparison which can be directly vectorized into
5228 a vector comparison. Fills in STMTS with all stmts visited during the
5229 walk. */
5230
5231 static bool
5232 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
5233 {
5234 tree rhs1;
5235 enum tree_code rhs_code;
5236
5237 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5238 if (!def_stmt_info)
5239 return false;
5240
5241 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
5242 if (!def_stmt)
5243 return false;
5244
5245 if (stmts.contains (def_stmt))
5246 return true;
5247
5248 rhs1 = gimple_assign_rhs1 (def_stmt);
5249 rhs_code = gimple_assign_rhs_code (def_stmt);
5250 switch (rhs_code)
5251 {
5252 case SSA_NAME:
5253 if (! check_bool_pattern (rhs1, vinfo, stmts))
5254 return false;
5255 break;
5256
5257 CASE_CONVERT:
5258 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
5259 return false;
5260 if (! check_bool_pattern (rhs1, vinfo, stmts))
5261 return false;
5262 break;
5263
5264 case BIT_NOT_EXPR:
5265 if (! check_bool_pattern (rhs1, vinfo, stmts))
5266 return false;
5267 break;
5268
5269 case BIT_AND_EXPR:
5270 case BIT_IOR_EXPR:
5271 case BIT_XOR_EXPR:
5272 if (! check_bool_pattern (rhs1, vinfo, stmts)
5273 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
5274 return false;
5275 break;
5276
5277 default:
5278 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5279 {
5280 tree vecitype, comp_vectype;
5281
5282 /* If the comparison can throw, then is_gimple_condexpr will be
5283 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5284 if (stmt_could_throw_p (cfun, def_stmt))
5285 return false;
5286
5287 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
5288 if (comp_vectype == NULL_TREE)
5289 return false;
5290
5291 tree mask_type = get_mask_type_for_scalar_type (vinfo,
5292 TREE_TYPE (rhs1));
5293 if (mask_type
5294 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5295 return false;
5296
5297 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
5298 {
5299 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5300 tree itype
5301 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5302 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5303 if (vecitype == NULL_TREE)
5304 return false;
5305 }
5306 else
5307 vecitype = comp_vectype;
5308 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
5309 return false;
5310 }
5311 else
5312 return false;
5313 break;
5314 }
5315
5316 bool res = stmts.add (def_stmt);
5317 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5318 gcc_assert (!res);
5319
5320 return true;
5321 }
5322
5323
5324 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5325 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5326 pattern sequence. */
5327
5328 static tree
5329 adjust_bool_pattern_cast (vec_info *vinfo,
5330 tree type, tree var, stmt_vec_info stmt_info)
5331 {
5332 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5333 NOP_EXPR, var);
5334 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
5335 get_vectype_for_scalar_type (vinfo, type));
5336 return gimple_assign_lhs (cast_stmt);
5337 }
5338
5339 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5340 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5341 type, OUT_TYPE is the desired final integer type of the whole pattern.
5342 STMT_INFO is the info of the pattern root and is where pattern stmts should
5343 be associated with. DEFS is a map of pattern defs. */
5344
5345 static void
5346 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
5347 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
5348 {
5349 gimple *stmt = SSA_NAME_DEF_STMT (var);
5350 enum tree_code rhs_code, def_rhs_code;
5351 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
5352 location_t loc;
5353 gimple *pattern_stmt, *def_stmt;
5354 tree trueval = NULL_TREE;
5355
5356 rhs1 = gimple_assign_rhs1 (stmt);
5357 rhs2 = gimple_assign_rhs2 (stmt);
5358 rhs_code = gimple_assign_rhs_code (stmt);
5359 loc = gimple_location (stmt);
5360 switch (rhs_code)
5361 {
5362 case SSA_NAME:
5363 CASE_CONVERT:
5364 irhs1 = *defs.get (rhs1);
5365 itype = TREE_TYPE (irhs1);
5366 pattern_stmt
5367 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5368 SSA_NAME, irhs1);
5369 break;
5370
5371 case BIT_NOT_EXPR:
5372 irhs1 = *defs.get (rhs1);
5373 itype = TREE_TYPE (irhs1);
5374 pattern_stmt
5375 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5376 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
5377 break;
5378
5379 case BIT_AND_EXPR:
5380 /* Try to optimize x = y & (a < b ? 1 : 0); into
5381 x = (a < b ? y : 0);
5382
5383 E.g. for:
5384 bool a_b, b_b, c_b;
5385 TYPE d_T;
5386
5387 S1 a_b = x1 CMP1 y1;
5388 S2 b_b = x2 CMP2 y2;
5389 S3 c_b = a_b & b_b;
5390 S4 d_T = (TYPE) c_b;
5391
5392 we would normally emit:
5393
5394 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5395 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5396 S3' c_T = a_T & b_T;
5397 S4' d_T = c_T;
5398
5399 but we can save one stmt by using the
5400 result of one of the COND_EXPRs in the other COND_EXPR and leave
5401 BIT_AND_EXPR stmt out:
5402
5403 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5404 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5405 S4' f_T = c_T;
5406
5407 At least when VEC_COND_EXPR is implemented using masks
5408 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5409 computes the comparison masks and ands it, in one case with
5410 all ones vector, in the other case with a vector register.
5411 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5412 often more expensive. */
5413 def_stmt = SSA_NAME_DEF_STMT (rhs2);
5414 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5415 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5416 {
5417 irhs1 = *defs.get (rhs1);
5418 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5419 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5420 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5421 {
5422 rhs_code = def_rhs_code;
5423 rhs1 = def_rhs1;
5424 rhs2 = gimple_assign_rhs2 (def_stmt);
5425 trueval = irhs1;
5426 goto do_compare;
5427 }
5428 else
5429 irhs2 = *defs.get (rhs2);
5430 goto and_ior_xor;
5431 }
5432 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5433 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5434 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5435 {
5436 irhs2 = *defs.get (rhs2);
5437 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5438 if (TYPE_PRECISION (TREE_TYPE (irhs2))
5439 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5440 {
5441 rhs_code = def_rhs_code;
5442 rhs1 = def_rhs1;
5443 rhs2 = gimple_assign_rhs2 (def_stmt);
5444 trueval = irhs2;
5445 goto do_compare;
5446 }
5447 else
5448 irhs1 = *defs.get (rhs1);
5449 goto and_ior_xor;
5450 }
5451 /* FALLTHRU */
5452 case BIT_IOR_EXPR:
5453 case BIT_XOR_EXPR:
5454 irhs1 = *defs.get (rhs1);
5455 irhs2 = *defs.get (rhs2);
5456 and_ior_xor:
5457 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5458 != TYPE_PRECISION (TREE_TYPE (irhs2)))
5459 {
5460 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
5461 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
5462 int out_prec = TYPE_PRECISION (out_type);
5463 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
5464 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
5465 stmt_info);
5466 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
5467 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
5468 stmt_info);
5469 else
5470 {
5471 irhs1 = adjust_bool_pattern_cast (vinfo,
5472 out_type, irhs1, stmt_info);
5473 irhs2 = adjust_bool_pattern_cast (vinfo,
5474 out_type, irhs2, stmt_info);
5475 }
5476 }
5477 itype = TREE_TYPE (irhs1);
5478 pattern_stmt
5479 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5480 rhs_code, irhs1, irhs2);
5481 break;
5482
5483 default:
5484 do_compare:
5485 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
5486 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
5487 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
5488 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
5489 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
5490 {
5491 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5492 itype
5493 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5494 }
5495 else
5496 itype = TREE_TYPE (rhs1);
5497 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
5498 if (trueval == NULL_TREE)
5499 trueval = build_int_cst (itype, 1);
5500 else
5501 gcc_checking_assert (useless_type_conversion_p (itype,
5502 TREE_TYPE (trueval)));
5503 pattern_stmt
5504 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5505 COND_EXPR, cond_expr, trueval,
5506 build_int_cst (itype, 0));
5507 break;
5508 }
5509
5510 gimple_set_location (pattern_stmt, loc);
5511 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
5512 get_vectype_for_scalar_type (vinfo, itype));
5513 defs.put (var, gimple_assign_lhs (pattern_stmt));
5514 }
5515
5516 /* Comparison function to qsort a vector of gimple stmts after UID. */
5517
5518 static int
5519 sort_after_uid (const void *p1, const void *p2)
5520 {
5521 const gimple *stmt1 = *(const gimple * const *)p1;
5522 const gimple *stmt2 = *(const gimple * const *)p2;
5523 return gimple_uid (stmt1) - gimple_uid (stmt2);
5524 }
5525
5526 /* Create pattern stmts for all stmts participating in the bool pattern
5527 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5528 OUT_TYPE. Return the def of the pattern root. */
5529
5530 static tree
5531 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
5532 tree out_type, stmt_vec_info stmt_info)
5533 {
5534 /* Gather original stmts in the bool pattern in their order of appearance
5535 in the IL. */
5536 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
5537 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
5538 i != bool_stmt_set.end (); ++i)
5539 bool_stmts.quick_push (*i);
5540 bool_stmts.qsort (sort_after_uid);
5541
5542 /* Now process them in that order, producing pattern stmts. */
5543 hash_map <tree, tree> defs;
5544 for (unsigned i = 0; i < bool_stmts.length (); ++i)
5545 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
5546 out_type, stmt_info, defs);
5547
5548 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5549 gimple *pattern_stmt
5550 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5551 return gimple_assign_lhs (pattern_stmt);
5552 }
5553
5554 /* Return the proper type for converting bool VAR into
5555 an integer value or NULL_TREE if no such type exists.
5556 The type is chosen so that the converted value has the
5557 same number of elements as VAR's vector type. */
5558
5559 static tree
5560 integer_type_for_mask (tree var, vec_info *vinfo)
5561 {
5562 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5563 return NULL_TREE;
5564
5565 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5566 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
5567 return NULL_TREE;
5568
5569 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5570 }
5571
5572 /* Function vect_recog_gcond_pattern
5573
5574 Try to find pattern like following:
5575
5576 if (a op b)
5577
5578 where operator 'op' is not != and convert it to an adjusted boolean pattern
5579
5580 mask = a op b
5581 if (mask != 0)
5582
5583 and set the mask type on MASK.
5584
5585 Input:
5586
5587 * STMT_VINFO: The stmt at the end from which the pattern
5588 search begins, i.e. cast of a bool to
5589 an integer type.
5590
5591 Output:
5592
5593 * TYPE_OUT: The type of the output of this pattern.
5594
5595 * Return value: A new stmt that will be used to replace the pattern. */
5596
5597 static gimple *
5598 vect_recog_gcond_pattern (vec_info *vinfo,
5599 stmt_vec_info stmt_vinfo, tree *type_out)
5600 {
5601 /* Currently we only support this for loop vectorization and when multiple
5602 exits. */
5603 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5604 if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
5605 return NULL;
5606
5607 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
5608 gcond* cond = NULL;
5609 if (!(cond = dyn_cast <gcond *> (last_stmt)))
5610 return NULL;
5611
5612 auto lhs = gimple_cond_lhs (cond);
5613 auto rhs = gimple_cond_rhs (cond);
5614 auto code = gimple_cond_code (cond);
5615
5616 tree scalar_type = TREE_TYPE (lhs);
5617 if (VECTOR_TYPE_P (scalar_type))
5618 return NULL;
5619
5620 if (code == NE_EXPR
5621 && zerop (rhs)
5622 && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
5623 return NULL;
5624
5625 tree vecitype = get_vectype_for_scalar_type (vinfo, scalar_type);
5626 if (vecitype == NULL_TREE)
5627 return NULL;
5628
5629 tree vectype = truth_type_for (vecitype);
5630
5631 tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5632 gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
5633 append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
5634
5635 gimple *pattern_stmt
5636 = gimple_build_cond (NE_EXPR, new_lhs,
5637 build_int_cst (TREE_TYPE (new_lhs), 0),
5638 NULL_TREE, NULL_TREE);
5639 *type_out = vectype;
5640 vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
5641 return pattern_stmt;
5642 }
5643
5644 /* Function vect_recog_bool_pattern
5645
5646 Try to find pattern like following:
5647
5648 bool a_b, b_b, c_b, d_b, e_b;
5649 TYPE f_T;
5650 loop:
5651 S1 a_b = x1 CMP1 y1;
5652 S2 b_b = x2 CMP2 y2;
5653 S3 c_b = a_b & b_b;
5654 S4 d_b = x3 CMP3 y3;
5655 S5 e_b = c_b | d_b;
5656 S6 f_T = (TYPE) e_b;
5657
5658 where type 'TYPE' is an integral type. Or a similar pattern
5659 ending in
5660
5661 S6 f_Y = e_b ? r_Y : s_Y;
5662
5663 as results from if-conversion of a complex condition.
5664
5665 Input:
5666
5667 * STMT_VINFO: The stmt at the end from which the pattern
5668 search begins, i.e. cast of a bool to
5669 an integer type.
5670
5671 Output:
5672
5673 * TYPE_OUT: The type of the output of this pattern.
5674
5675 * Return value: A new stmt that will be used to replace the pattern.
5676
5677 Assuming size of TYPE is the same as size of all comparisons
5678 (otherwise some casts would be added where needed), the above
5679 sequence we create related pattern stmts:
5680 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5681 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5682 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5683 S5' e_T = c_T | d_T;
5684 S6' f_T = e_T;
5685
5686 Instead of the above S3' we could emit:
5687 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5688 S3' c_T = a_T | b_T;
5689 but the above is more efficient. */
5690
5691 static gimple *
5692 vect_recog_bool_pattern (vec_info *vinfo,
5693 stmt_vec_info stmt_vinfo, tree *type_out)
5694 {
5695 gimple *last_stmt = stmt_vinfo->stmt;
5696 enum tree_code rhs_code;
5697 tree var, lhs, rhs, vectype;
5698 gimple *pattern_stmt;
5699
5700 if (!is_gimple_assign (last_stmt))
5701 return NULL;
5702
5703 var = gimple_assign_rhs1 (last_stmt);
5704 lhs = gimple_assign_lhs (last_stmt);
5705 rhs_code = gimple_assign_rhs_code (last_stmt);
5706
5707 if (rhs_code == VIEW_CONVERT_EXPR)
5708 var = TREE_OPERAND (var, 0);
5709
5710 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5711 return NULL;
5712
5713 hash_set<gimple *> bool_stmts;
5714
5715 if (CONVERT_EXPR_CODE_P (rhs_code)
5716 || rhs_code == VIEW_CONVERT_EXPR)
5717 {
5718 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
5719 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5720 return NULL;
5721 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5722
5723 if (check_bool_pattern (var, vinfo, bool_stmts))
5724 {
5725 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5726 TREE_TYPE (lhs), stmt_vinfo);
5727 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5728 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5729 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5730 else
5731 pattern_stmt
5732 = gimple_build_assign (lhs, NOP_EXPR, rhs);
5733 }
5734 else
5735 {
5736 tree type = integer_type_for_mask (var, vinfo);
5737 tree cst0, cst1, tmp;
5738
5739 if (!type)
5740 return NULL;
5741
5742 /* We may directly use cond with narrowed type to avoid
5743 multiple cond exprs with following result packing and
5744 perform single cond with packed mask instead. In case
5745 of widening we better make cond first and then extract
5746 results. */
5747 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
5748 type = TREE_TYPE (lhs);
5749
5750 cst0 = build_int_cst (type, 0);
5751 cst1 = build_int_cst (type, 1);
5752 tmp = vect_recog_temp_ssa_var (type, NULL);
5753 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
5754
5755 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
5756 {
5757 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
5758 append_pattern_def_seq (vinfo, stmt_vinfo,
5759 pattern_stmt, new_vectype);
5760
5761 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5762 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
5763 }
5764 }
5765
5766 *type_out = vectype;
5767 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5768
5769 return pattern_stmt;
5770 }
5771 else if (rhs_code == COND_EXPR
5772 && TREE_CODE (var) == SSA_NAME)
5773 {
5774 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5775 if (vectype == NULL_TREE)
5776 return NULL;
5777
5778 /* Build a scalar type for the boolean result that when
5779 vectorized matches the vector type of the result in
5780 size and number of elements. */
5781 unsigned prec
5782 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
5783 TYPE_VECTOR_SUBPARTS (vectype));
5784
5785 tree type
5786 = build_nonstandard_integer_type (prec,
5787 TYPE_UNSIGNED (TREE_TYPE (var)));
5788 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
5789 return NULL;
5790
5791 if (check_bool_pattern (var, vinfo, bool_stmts))
5792 var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
5793 else if (integer_type_for_mask (var, vinfo))
5794 return NULL;
5795
5796 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5797 pattern_stmt
5798 = gimple_build_assign (lhs, COND_EXPR,
5799 build2 (NE_EXPR, boolean_type_node,
5800 var, build_int_cst (TREE_TYPE (var), 0)),
5801 gimple_assign_rhs2 (last_stmt),
5802 gimple_assign_rhs3 (last_stmt));
5803 *type_out = vectype;
5804 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5805
5806 return pattern_stmt;
5807 }
5808 else if (rhs_code == SSA_NAME
5809 && STMT_VINFO_DATA_REF (stmt_vinfo))
5810 {
5811 stmt_vec_info pattern_stmt_info;
5812 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5813 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
5814 return NULL;
5815
5816 if (check_bool_pattern (var, vinfo, bool_stmts))
5817 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5818 TREE_TYPE (vectype), stmt_vinfo);
5819 else
5820 {
5821 tree type = integer_type_for_mask (var, vinfo);
5822 tree cst0, cst1, new_vectype;
5823
5824 if (!type)
5825 return NULL;
5826
5827 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
5828 type = TREE_TYPE (vectype);
5829
5830 cst0 = build_int_cst (type, 0);
5831 cst1 = build_int_cst (type, 1);
5832 new_vectype = get_vectype_for_scalar_type (vinfo, type);
5833
5834 rhs = vect_recog_temp_ssa_var (type, NULL);
5835 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
5836 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
5837 }
5838
5839 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
5840 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5841 {
5842 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5843 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
5844 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
5845 rhs = rhs2;
5846 }
5847 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5848 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5849 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5850 *type_out = vectype;
5851 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5852
5853 return pattern_stmt;
5854 }
5855 else
5856 return NULL;
5857 }
5858
5859
5860 /* A helper for vect_recog_mask_conversion_pattern. Build
5861 conversion of MASK to a type suitable for masking VECTYPE.
5862 Built statement gets required vectype and is appended to
5863 a pattern sequence of STMT_VINFO.
5864
5865 Return converted mask. */
5866
5867 static tree
5868 build_mask_conversion (vec_info *vinfo,
5869 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
5870 {
5871 gimple *stmt;
5872 tree masktype, tmp;
5873
5874 masktype = truth_type_for (vectype);
5875 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
5876 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
5877 append_pattern_def_seq (vinfo, stmt_vinfo,
5878 stmt, masktype, TREE_TYPE (vectype));
5879
5880 return tmp;
5881 }
5882
5883
5884 /* Function vect_recog_mask_conversion_pattern
5885
5886 Try to find statements which require boolean type
5887 converison. Additional conversion statements are
5888 added to handle such cases. For example:
5889
5890 bool m_1, m_2, m_3;
5891 int i_4, i_5;
5892 double d_6, d_7;
5893 char c_1, c_2, c_3;
5894
5895 S1 m_1 = i_4 > i_5;
5896 S2 m_2 = d_6 < d_7;
5897 S3 m_3 = m_1 & m_2;
5898 S4 c_1 = m_3 ? c_2 : c_3;
5899
5900 Will be transformed into:
5901
5902 S1 m_1 = i_4 > i_5;
5903 S2 m_2 = d_6 < d_7;
5904 S3'' m_2' = (_Bool[bitsize=32])m_2
5905 S3' m_3' = m_1 & m_2';
5906 S4'' m_3'' = (_Bool[bitsize=8])m_3'
5907 S4' c_1' = m_3'' ? c_2 : c_3; */
5908
5909 static gimple *
5910 vect_recog_mask_conversion_pattern (vec_info *vinfo,
5911 stmt_vec_info stmt_vinfo, tree *type_out)
5912 {
5913 gimple *last_stmt = stmt_vinfo->stmt;
5914 enum tree_code rhs_code;
5915 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
5916 tree vectype1, vectype2;
5917 stmt_vec_info pattern_stmt_info;
5918 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
5919 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
5920
5921 /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
5922 conversion. */
5923 if (is_gimple_call (last_stmt)
5924 && gimple_call_internal_p (last_stmt))
5925 {
5926 gcall *pattern_stmt;
5927
5928 internal_fn ifn = gimple_call_internal_fn (last_stmt);
5929 int mask_argno = internal_fn_mask_index (ifn);
5930 if (mask_argno < 0)
5931 return NULL;
5932
5933 bool store_p = internal_store_fn_p (ifn);
5934 bool load_p = internal_store_fn_p (ifn);
5935 if (store_p)
5936 {
5937 int rhs_index = internal_fn_stored_value_index (ifn);
5938 tree rhs = gimple_call_arg (last_stmt, rhs_index);
5939 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
5940 }
5941 else
5942 {
5943 lhs = gimple_call_lhs (last_stmt);
5944 if (!lhs)
5945 return NULL;
5946 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5947 }
5948
5949 if (!vectype1)
5950 return NULL;
5951
5952 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
5953 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
5954 if (mask_arg_type)
5955 {
5956 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
5957
5958 if (!vectype2
5959 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5960 TYPE_VECTOR_SUBPARTS (vectype2)))
5961 return NULL;
5962 }
5963 else if (store_p || load_p)
5964 return NULL;
5965
5966 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
5967
5968 auto_vec<tree, 8> args;
5969 unsigned int nargs = gimple_call_num_args (last_stmt);
5970 args.safe_grow (nargs, true);
5971 for (unsigned int i = 0; i < nargs; ++i)
5972 args[i] = ((int) i == mask_argno
5973 ? tmp
5974 : gimple_call_arg (last_stmt, i));
5975 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
5976
5977 if (!store_p)
5978 {
5979 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5980 gimple_call_set_lhs (pattern_stmt, lhs);
5981 }
5982
5983 if (load_p || store_p)
5984 gimple_call_set_nothrow (pattern_stmt, true);
5985
5986 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5987 if (STMT_VINFO_DATA_REF (stmt_vinfo))
5988 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5989
5990 *type_out = vectype1;
5991 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5992
5993 return pattern_stmt;
5994 }
5995
5996 if (!is_gimple_assign (last_stmt))
5997 return NULL;
5998
5999 gimple *pattern_stmt;
6000 lhs = gimple_assign_lhs (last_stmt);
6001 rhs1 = gimple_assign_rhs1 (last_stmt);
6002 rhs_code = gimple_assign_rhs_code (last_stmt);
6003
6004 /* Check for cond expression requiring mask conversion. */
6005 if (rhs_code == COND_EXPR)
6006 {
6007 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6008
6009 if (TREE_CODE (rhs1) == SSA_NAME)
6010 {
6011 rhs1_type = integer_type_for_mask (rhs1, vinfo);
6012 if (!rhs1_type)
6013 return NULL;
6014 }
6015 else if (COMPARISON_CLASS_P (rhs1))
6016 {
6017 /* Check whether we're comparing scalar booleans and (if so)
6018 whether a better mask type exists than the mask associated
6019 with boolean-sized elements. This avoids unnecessary packs
6020 and unpacks if the booleans are set from comparisons of
6021 wider types. E.g. in:
6022
6023 int x1, x2, x3, x4, y1, y1;
6024 ...
6025 bool b1 = (x1 == x2);
6026 bool b2 = (x3 == x4);
6027 ... = b1 == b2 ? y1 : y2;
6028
6029 it is better for b1 and b2 to use the mask type associated
6030 with int elements rather bool (byte) elements. */
6031 rhs1_op0 = TREE_OPERAND (rhs1, 0);
6032 rhs1_op1 = TREE_OPERAND (rhs1, 1);
6033 if (!rhs1_op0 || !rhs1_op1)
6034 return NULL;
6035 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
6036 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
6037
6038 if (!rhs1_op0_type)
6039 rhs1_type = TREE_TYPE (rhs1_op0);
6040 else if (!rhs1_op1_type)
6041 rhs1_type = TREE_TYPE (rhs1_op1);
6042 else if (TYPE_PRECISION (rhs1_op0_type)
6043 != TYPE_PRECISION (rhs1_op1_type))
6044 {
6045 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
6046 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
6047 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
6048 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
6049 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
6050 {
6051 if (abs (tmp0) > abs (tmp1))
6052 rhs1_type = rhs1_op1_type;
6053 else
6054 rhs1_type = rhs1_op0_type;
6055 }
6056 else
6057 rhs1_type = build_nonstandard_integer_type
6058 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
6059 }
6060 else
6061 rhs1_type = rhs1_op0_type;
6062 }
6063 else
6064 return NULL;
6065
6066 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6067
6068 if (!vectype1 || !vectype2)
6069 return NULL;
6070
6071 /* Continue if a conversion is needed. Also continue if we have
6072 a comparison whose vector type would normally be different from
6073 VECTYPE2 when considered in isolation. In that case we'll
6074 replace the comparison with an SSA name (so that we can record
6075 its vector type) and behave as though the comparison was an SSA
6076 name from the outset. */
6077 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
6078 TYPE_VECTOR_SUBPARTS (vectype2))
6079 && !rhs1_op0_type
6080 && !rhs1_op1_type)
6081 return NULL;
6082
6083 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
6084 in place, we can handle it in vectorizable_condition. This avoids
6085 unnecessary promotion stmts and increased vectorization factor. */
6086 if (COMPARISON_CLASS_P (rhs1)
6087 && INTEGRAL_TYPE_P (rhs1_type)
6088 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
6089 TYPE_VECTOR_SUBPARTS (vectype2)))
6090 {
6091 enum vect_def_type dt;
6092 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
6093 && dt == vect_external_def
6094 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
6095 && (dt == vect_external_def
6096 || dt == vect_constant_def))
6097 {
6098 tree wide_scalar_type = build_nonstandard_integer_type
6099 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
6100 tree vectype3 = get_vectype_for_scalar_type (vinfo,
6101 wide_scalar_type);
6102 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
6103 return NULL;
6104 }
6105 }
6106
6107 /* If rhs1 is a comparison we need to move it into a
6108 separate statement. */
6109 if (TREE_CODE (rhs1) != SSA_NAME)
6110 {
6111 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
6112 if (rhs1_op0_type
6113 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
6114 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
6115 vectype2, stmt_vinfo);
6116 if (rhs1_op1_type
6117 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
6118 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
6119 vectype2, stmt_vinfo);
6120 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
6121 rhs1_op0, rhs1_op1);
6122 rhs1 = tmp;
6123 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
6124 rhs1_type);
6125 }
6126
6127 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
6128 TYPE_VECTOR_SUBPARTS (vectype2)))
6129 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6130 else
6131 tmp = rhs1;
6132
6133 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6134 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
6135 gimple_assign_rhs2 (last_stmt),
6136 gimple_assign_rhs3 (last_stmt));
6137
6138 *type_out = vectype1;
6139 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6140
6141 return pattern_stmt;
6142 }
6143
6144 /* Now check for binary boolean operations requiring conversion for
6145 one of operands. */
6146 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6147 return NULL;
6148
6149 if (rhs_code != BIT_IOR_EXPR
6150 && rhs_code != BIT_XOR_EXPR
6151 && rhs_code != BIT_AND_EXPR
6152 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
6153 return NULL;
6154
6155 rhs2 = gimple_assign_rhs2 (last_stmt);
6156
6157 rhs1_type = integer_type_for_mask (rhs1, vinfo);
6158 rhs2_type = integer_type_for_mask (rhs2, vinfo);
6159
6160 if (!rhs1_type || !rhs2_type
6161 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
6162 return NULL;
6163
6164 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
6165 {
6166 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6167 if (!vectype1)
6168 return NULL;
6169 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
6170 }
6171 else
6172 {
6173 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6174 if (!vectype1)
6175 return NULL;
6176 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6177 }
6178
6179 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6180 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
6181
6182 *type_out = vectype1;
6183 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6184
6185 return pattern_stmt;
6186 }
6187
6188 /* STMT_INFO is a load or store. If the load or store is conditional, return
6189 the boolean condition under which it occurs, otherwise return null. */
6190
6191 static tree
6192 vect_get_load_store_mask (stmt_vec_info stmt_info)
6193 {
6194 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
6195 {
6196 gcc_assert (gimple_assign_single_p (def_assign));
6197 return NULL_TREE;
6198 }
6199
6200 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
6201 {
6202 internal_fn ifn = gimple_call_internal_fn (def_call);
6203 int mask_index = internal_fn_mask_index (ifn);
6204 return gimple_call_arg (def_call, mask_index);
6205 }
6206
6207 gcc_unreachable ();
6208 }
6209
6210 /* Return MASK if MASK is suitable for masking an operation on vectors
6211 of type VECTYPE, otherwise convert it into such a form and return
6212 the result. Associate any conversion statements with STMT_INFO's
6213 pattern. */
6214
6215 static tree
6216 vect_convert_mask_for_vectype (tree mask, tree vectype,
6217 stmt_vec_info stmt_info, vec_info *vinfo)
6218 {
6219 tree mask_type = integer_type_for_mask (mask, vinfo);
6220 if (mask_type)
6221 {
6222 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
6223 if (mask_vectype
6224 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
6225 TYPE_VECTOR_SUBPARTS (mask_vectype)))
6226 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
6227 }
6228 return mask;
6229 }
6230
6231 /* Return the equivalent of:
6232
6233 fold_convert (TYPE, VALUE)
6234
6235 with the expectation that the operation will be vectorized.
6236 If new statements are needed, add them as pattern statements
6237 to STMT_INFO. */
6238
6239 static tree
6240 vect_add_conversion_to_pattern (vec_info *vinfo,
6241 tree type, tree value, stmt_vec_info stmt_info)
6242 {
6243 if (useless_type_conversion_p (type, TREE_TYPE (value)))
6244 return value;
6245
6246 tree new_value = vect_recog_temp_ssa_var (type, NULL);
6247 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
6248 append_pattern_def_seq (vinfo, stmt_info, conversion,
6249 get_vectype_for_scalar_type (vinfo, type));
6250 return new_value;
6251 }
6252
6253 /* Try to convert STMT_INFO into a call to a gather load or scatter store
6254 internal function. Return the final statement on success and set
6255 *TYPE_OUT to the vector type being loaded or stored.
6256
6257 This function only handles gathers and scatters that were recognized
6258 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6259
6260 static gimple *
6261 vect_recog_gather_scatter_pattern (vec_info *vinfo,
6262 stmt_vec_info stmt_info, tree *type_out)
6263 {
6264 /* Currently we only support this for loop vectorization. */
6265 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6266 if (!loop_vinfo)
6267 return NULL;
6268
6269 /* Make sure that we're looking at a gather load or scatter store. */
6270 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6271 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6272 return NULL;
6273
6274 /* Get the boolean that controls whether the load or store happens.
6275 This is null if the operation is unconditional. */
6276 tree mask = vect_get_load_store_mask (stmt_info);
6277
6278 /* Make sure that the target supports an appropriate internal
6279 function for the gather/scatter operation. */
6280 gather_scatter_info gs_info;
6281 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
6282 || gs_info.ifn == IFN_LAST)
6283 return NULL;
6284
6285 /* Convert the mask to the right form. */
6286 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
6287 gs_info.element_type);
6288 if (mask)
6289 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
6290 loop_vinfo);
6291 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
6292 || gs_info.ifn == IFN_MASK_GATHER_LOAD
6293 || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
6294 || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
6295 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
6296
6297 /* Get the invariant base and non-invariant offset, converting the
6298 latter to the same width as the vector elements. */
6299 tree base = gs_info.base;
6300 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
6301 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
6302 gs_info.offset, stmt_info);
6303
6304 /* Build the new pattern statement. */
6305 tree scale = size_int (gs_info.scale);
6306 gcall *pattern_stmt;
6307 if (DR_IS_READ (dr))
6308 {
6309 tree zero = build_zero_cst (gs_info.element_type);
6310 if (mask != NULL)
6311 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
6312 offset, scale, zero, mask);
6313 else
6314 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
6315 offset, scale, zero);
6316 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
6317 gimple_call_set_lhs (pattern_stmt, load_lhs);
6318 }
6319 else
6320 {
6321 tree rhs = vect_get_store_rhs (stmt_info);
6322 if (mask != NULL)
6323 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
6324 base, offset, scale, rhs,
6325 mask);
6326 else
6327 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
6328 base, offset, scale, rhs);
6329 }
6330 gimple_call_set_nothrow (pattern_stmt, true);
6331
6332 /* Copy across relevant vectorization info and associate DR with the
6333 new pattern statement instead of the original statement. */
6334 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
6335 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
6336
6337 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6338 *type_out = vectype;
6339 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
6340
6341 return pattern_stmt;
6342 }
6343
6344 /* Return true if TYPE is a non-boolean integer type. These are the types
6345 that we want to consider for narrowing. */
6346
6347 static bool
6348 vect_narrowable_type_p (tree type)
6349 {
6350 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
6351 }
6352
6353 /* Return true if the operation given by CODE can be truncated to N bits
6354 when only N bits of the output are needed. This is only true if bit N+1
6355 of the inputs has no effect on the low N bits of the result. */
6356
6357 static bool
6358 vect_truncatable_operation_p (tree_code code)
6359 {
6360 switch (code)
6361 {
6362 case PLUS_EXPR:
6363 case MINUS_EXPR:
6364 case MULT_EXPR:
6365 case BIT_AND_EXPR:
6366 case BIT_IOR_EXPR:
6367 case BIT_XOR_EXPR:
6368 case COND_EXPR:
6369 return true;
6370
6371 default:
6372 return false;
6373 }
6374 }
6375
6376 /* Record that STMT_INFO could be changed from operating on TYPE to
6377 operating on a type with the precision and sign given by PRECISION
6378 and SIGN respectively. PRECISION is an arbitrary bit precision;
6379 it might not be a whole number of bytes. */
6380
6381 static void
6382 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
6383 unsigned int precision, signop sign)
6384 {
6385 /* Round the precision up to a whole number of bytes. */
6386 precision = vect_element_precision (precision);
6387 if (precision < TYPE_PRECISION (type)
6388 && (!stmt_info->operation_precision
6389 || stmt_info->operation_precision > precision))
6390 {
6391 stmt_info->operation_precision = precision;
6392 stmt_info->operation_sign = sign;
6393 }
6394 }
6395
6396 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6397 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6398 is an arbitrary bit precision; it might not be a whole number of bytes. */
6399
6400 static void
6401 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
6402 unsigned int min_input_precision)
6403 {
6404 /* This operation in isolation only requires the inputs to have
6405 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6406 that MIN_INPUT_PRECISION is a natural precision for the chain
6407 as a whole. E.g. consider something like:
6408
6409 unsigned short *x, *y;
6410 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6411
6412 The right shift can be done on unsigned chars, and only requires the
6413 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6414 approach would mean turning a natural chain of single-vector unsigned
6415 short operations into one that truncates "*x" and then extends
6416 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6417 operation and one vector for each unsigned char operation.
6418 This would be a significant pessimization.
6419
6420 Instead only propagate the maximum of this precision and the precision
6421 required by the users of the result. This means that we don't pessimize
6422 the case above but continue to optimize things like:
6423
6424 unsigned char *y;
6425 unsigned short *x;
6426 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6427
6428 Here we would truncate two vectors of *x to a single vector of
6429 unsigned chars and use single-vector unsigned char operations for
6430 everything else, rather than doing two unsigned short copies of
6431 "(*x & 0xf0) >> 4" and then truncating the result. */
6432 min_input_precision = MAX (min_input_precision,
6433 stmt_info->min_output_precision);
6434
6435 if (min_input_precision < TYPE_PRECISION (type)
6436 && (!stmt_info->min_input_precision
6437 || stmt_info->min_input_precision > min_input_precision))
6438 stmt_info->min_input_precision = min_input_precision;
6439 }
6440
6441 /* Subroutine of vect_determine_min_output_precision. Return true if
6442 we can calculate a reduced number of output bits for STMT_INFO,
6443 whose result is LHS. */
6444
6445 static bool
6446 vect_determine_min_output_precision_1 (vec_info *vinfo,
6447 stmt_vec_info stmt_info, tree lhs)
6448 {
6449 /* Take the maximum precision required by users of the result. */
6450 unsigned int precision = 0;
6451 imm_use_iterator iter;
6452 use_operand_p use;
6453 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
6454 {
6455 gimple *use_stmt = USE_STMT (use);
6456 if (is_gimple_debug (use_stmt))
6457 continue;
6458 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
6459 if (!use_stmt_info || !use_stmt_info->min_input_precision)
6460 return false;
6461 /* The input precision recorded for COND_EXPRs applies only to the
6462 "then" and "else" values. */
6463 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
6464 if (assign
6465 && gimple_assign_rhs_code (assign) == COND_EXPR
6466 && use->use != gimple_assign_rhs2_ptr (assign)
6467 && use->use != gimple_assign_rhs3_ptr (assign))
6468 return false;
6469 precision = MAX (precision, use_stmt_info->min_input_precision);
6470 }
6471
6472 if (dump_enabled_p ())
6473 dump_printf_loc (MSG_NOTE, vect_location,
6474 "only the low %d bits of %T are significant\n",
6475 precision, lhs);
6476 stmt_info->min_output_precision = precision;
6477 return true;
6478 }
6479
6480 /* Calculate min_output_precision for STMT_INFO. */
6481
6482 static void
6483 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6484 {
6485 /* We're only interested in statements with a narrowable result. */
6486 tree lhs = gimple_get_lhs (stmt_info->stmt);
6487 if (!lhs
6488 || TREE_CODE (lhs) != SSA_NAME
6489 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
6490 return;
6491
6492 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
6493 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
6494 }
6495
6496 /* Use range information to decide whether STMT (described by STMT_INFO)
6497 could be done in a narrower type. This is effectively a forward
6498 propagation, since it uses context-independent information that applies
6499 to all users of an SSA name. */
6500
6501 static void
6502 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
6503 {
6504 tree lhs = gimple_assign_lhs (stmt);
6505 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
6506 return;
6507
6508 tree type = TREE_TYPE (lhs);
6509 if (!vect_narrowable_type_p (type))
6510 return;
6511
6512 /* First see whether we have any useful range information for the result. */
6513 unsigned int precision = TYPE_PRECISION (type);
6514 signop sign = TYPE_SIGN (type);
6515 wide_int min_value, max_value;
6516 if (!vect_get_range_info (lhs, &min_value, &max_value))
6517 return;
6518
6519 tree_code code = gimple_assign_rhs_code (stmt);
6520 unsigned int nops = gimple_num_ops (stmt);
6521
6522 if (!vect_truncatable_operation_p (code))
6523 /* Check that all relevant input operands are compatible, and update
6524 [MIN_VALUE, MAX_VALUE] to include their ranges. */
6525 for (unsigned int i = 1; i < nops; ++i)
6526 {
6527 tree op = gimple_op (stmt, i);
6528 if (TREE_CODE (op) == INTEGER_CST)
6529 {
6530 /* Don't require the integer to have RHS_TYPE (which it might
6531 not for things like shift amounts, etc.), but do require it
6532 to fit the type. */
6533 if (!int_fits_type_p (op, type))
6534 return;
6535
6536 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
6537 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
6538 }
6539 else if (TREE_CODE (op) == SSA_NAME)
6540 {
6541 /* Ignore codes that don't take uniform arguments. */
6542 if (!types_compatible_p (TREE_TYPE (op), type))
6543 return;
6544
6545 wide_int op_min_value, op_max_value;
6546 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
6547 return;
6548
6549 min_value = wi::min (min_value, op_min_value, sign);
6550 max_value = wi::max (max_value, op_max_value, sign);
6551 }
6552 else
6553 return;
6554 }
6555
6556 /* Try to switch signed types for unsigned types if we can.
6557 This is better for two reasons. First, unsigned ops tend
6558 to be cheaper than signed ops. Second, it means that we can
6559 handle things like:
6560
6561 signed char c;
6562 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6563
6564 as:
6565
6566 signed char c;
6567 unsigned short res_1 = (unsigned short) c & 0xff00;
6568 int res = (int) res_1;
6569
6570 where the intermediate result res_1 has unsigned rather than
6571 signed type. */
6572 if (sign == SIGNED && !wi::neg_p (min_value))
6573 sign = UNSIGNED;
6574
6575 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6576 unsigned int precision1 = wi::min_precision (min_value, sign);
6577 unsigned int precision2 = wi::min_precision (max_value, sign);
6578 unsigned int value_precision = MAX (precision1, precision2);
6579 if (value_precision >= precision)
6580 return;
6581
6582 if (dump_enabled_p ())
6583 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6584 " without loss of precision: %G",
6585 sign == SIGNED ? "signed" : "unsigned",
6586 value_precision, (gimple *) stmt);
6587
6588 vect_set_operation_type (stmt_info, type, value_precision, sign);
6589 vect_set_min_input_precision (stmt_info, type, value_precision);
6590 }
6591
6592 /* Use information about the users of STMT's result to decide whether
6593 STMT (described by STMT_INFO) could be done in a narrower type.
6594 This is effectively a backward propagation. */
6595
6596 static void
6597 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
6598 {
6599 tree_code code = gimple_assign_rhs_code (stmt);
6600 unsigned int opno = (code == COND_EXPR ? 2 : 1);
6601 tree type = TREE_TYPE (gimple_op (stmt, opno));
6602 if (!vect_narrowable_type_p (type))
6603 return;
6604
6605 unsigned int precision = TYPE_PRECISION (type);
6606 unsigned int operation_precision, min_input_precision;
6607 switch (code)
6608 {
6609 CASE_CONVERT:
6610 /* Only the bits that contribute to the output matter. Don't change
6611 the precision of the operation itself. */
6612 operation_precision = precision;
6613 min_input_precision = stmt_info->min_output_precision;
6614 break;
6615
6616 case LSHIFT_EXPR:
6617 case RSHIFT_EXPR:
6618 {
6619 tree shift = gimple_assign_rhs2 (stmt);
6620 if (TREE_CODE (shift) != INTEGER_CST
6621 || !wi::ltu_p (wi::to_widest (shift), precision))
6622 return;
6623 unsigned int const_shift = TREE_INT_CST_LOW (shift);
6624 if (code == LSHIFT_EXPR)
6625 {
6626 /* Avoid creating an undefined shift.
6627
6628 ??? We could instead use min_output_precision as-is and
6629 optimize out-of-range shifts to zero. However, only
6630 degenerate testcases shift away all their useful input data,
6631 and it isn't natural to drop input operations in the middle
6632 of vectorization. This sort of thing should really be
6633 handled before vectorization. */
6634 operation_precision = MAX (stmt_info->min_output_precision,
6635 const_shift + 1);
6636 /* We need CONST_SHIFT fewer bits of the input. */
6637 min_input_precision = (MAX (operation_precision, const_shift)
6638 - const_shift);
6639 }
6640 else
6641 {
6642 /* We need CONST_SHIFT extra bits to do the operation. */
6643 operation_precision = (stmt_info->min_output_precision
6644 + const_shift);
6645 min_input_precision = operation_precision;
6646 }
6647 break;
6648 }
6649
6650 default:
6651 if (vect_truncatable_operation_p (code))
6652 {
6653 /* Input bit N has no effect on output bits N-1 and lower. */
6654 operation_precision = stmt_info->min_output_precision;
6655 min_input_precision = operation_precision;
6656 break;
6657 }
6658 return;
6659 }
6660
6661 if (operation_precision < precision)
6662 {
6663 if (dump_enabled_p ())
6664 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6665 " without affecting users: %G",
6666 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
6667 operation_precision, (gimple *) stmt);
6668 vect_set_operation_type (stmt_info, type, operation_precision,
6669 TYPE_SIGN (type));
6670 }
6671 vect_set_min_input_precision (stmt_info, type, min_input_precision);
6672 }
6673
6674 /* Return true if the statement described by STMT_INFO sets a boolean
6675 SSA_NAME and if we know how to vectorize this kind of statement using
6676 vector mask types. */
6677
6678 static bool
6679 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
6680 {
6681 tree lhs = gimple_get_lhs (stmt_info->stmt);
6682 tree_code code = ERROR_MARK;
6683 gassign *assign = NULL;
6684 gcond *cond = NULL;
6685
6686 if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
6687 code = gimple_assign_rhs_code (assign);
6688 else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
6689 {
6690 lhs = gimple_cond_lhs (cond);
6691 code = gimple_cond_code (cond);
6692 }
6693
6694 if (!lhs
6695 || TREE_CODE (lhs) != SSA_NAME
6696 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6697 return false;
6698
6699 if (code != ERROR_MARK)
6700 {
6701 switch (code)
6702 {
6703 CASE_CONVERT:
6704 case SSA_NAME:
6705 case BIT_NOT_EXPR:
6706 case BIT_IOR_EXPR:
6707 case BIT_XOR_EXPR:
6708 case BIT_AND_EXPR:
6709 return true;
6710
6711 default:
6712 return TREE_CODE_CLASS (code) == tcc_comparison;
6713 }
6714 }
6715 else if (is_a <gphi *> (stmt_info->stmt))
6716 return true;
6717 return false;
6718 }
6719
6720 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6721 a vector mask type instead of a normal vector type. Record the
6722 result in STMT_INFO->mask_precision. */
6723
6724 static void
6725 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6726 {
6727 if (!possible_vector_mask_operation_p (stmt_info))
6728 return;
6729
6730 /* If at least one boolean input uses a vector mask type,
6731 pick the mask type with the narrowest elements.
6732
6733 ??? This is the traditional behavior. It should always produce
6734 the smallest number of operations, but isn't necessarily the
6735 optimal choice. For example, if we have:
6736
6737 a = b & c
6738
6739 where:
6740
6741 - the user of a wants it to have a mask type for 16-bit elements (M16)
6742 - b also uses M16
6743 - c uses a mask type for 8-bit elements (M8)
6744
6745 then picking M8 gives:
6746
6747 - 1 M16->M8 pack for b
6748 - 1 M8 AND for a
6749 - 2 M8->M16 unpacks for the user of a
6750
6751 whereas picking M16 would have given:
6752
6753 - 2 M8->M16 unpacks for c
6754 - 2 M16 ANDs for a
6755
6756 The number of operations are equal, but M16 would have given
6757 a shorter dependency chain and allowed more ILP. */
6758 unsigned int precision = ~0U;
6759 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6760
6761 /* If the statement compares two values that shouldn't use vector masks,
6762 try comparing the values as normal scalars instead. */
6763 tree_code code = ERROR_MARK;
6764 tree op0_type;
6765 unsigned int nops = -1;
6766 unsigned int ops_start = 0;
6767
6768 if (gassign *assign = dyn_cast <gassign *> (stmt))
6769 {
6770 code = gimple_assign_rhs_code (assign);
6771 op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
6772 nops = gimple_num_ops (assign);
6773 ops_start = 1;
6774 }
6775 else if (gcond *cond = dyn_cast <gcond *> (stmt))
6776 {
6777 code = gimple_cond_code (cond);
6778 op0_type = TREE_TYPE (gimple_cond_lhs (cond));
6779 nops = 2;
6780 ops_start = 0;
6781 }
6782
6783 if (code != ERROR_MARK)
6784 {
6785 for (unsigned int i = ops_start; i < nops; ++i)
6786 {
6787 tree rhs = gimple_op (stmt, i);
6788 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
6789 continue;
6790
6791 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6792 if (!def_stmt_info)
6793 /* Don't let external or constant operands influence the choice.
6794 We can convert them to whichever vector type we pick. */
6795 continue;
6796
6797 if (def_stmt_info->mask_precision)
6798 {
6799 if (precision > def_stmt_info->mask_precision)
6800 precision = def_stmt_info->mask_precision;
6801 }
6802 }
6803
6804 if (precision == ~0U
6805 && TREE_CODE_CLASS (code) == tcc_comparison)
6806 {
6807 scalar_mode mode;
6808 tree vectype, mask_type;
6809 if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
6810 && (vectype = get_vectype_for_scalar_type (vinfo, op0_type))
6811 && (mask_type = get_mask_type_for_scalar_type (vinfo, op0_type))
6812 && expand_vec_cmp_expr_p (vectype, mask_type, code))
6813 precision = GET_MODE_BITSIZE (mode);
6814 }
6815 }
6816 else
6817 {
6818 gphi *phi = as_a <gphi *> (stmt_info->stmt);
6819 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
6820 {
6821 tree rhs = gimple_phi_arg_def (phi, i);
6822
6823 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6824 if (!def_stmt_info)
6825 /* Don't let external or constant operands influence the choice.
6826 We can convert them to whichever vector type we pick. */
6827 continue;
6828
6829 if (def_stmt_info->mask_precision)
6830 {
6831 if (precision > def_stmt_info->mask_precision)
6832 precision = def_stmt_info->mask_precision;
6833 }
6834 }
6835 }
6836
6837 if (dump_enabled_p ())
6838 {
6839 if (precision == ~0U)
6840 dump_printf_loc (MSG_NOTE, vect_location,
6841 "using normal nonmask vectors for %G",
6842 stmt_info->stmt);
6843 else
6844 dump_printf_loc (MSG_NOTE, vect_location,
6845 "using boolean precision %d for %G",
6846 precision, stmt_info->stmt);
6847 }
6848
6849 stmt_info->mask_precision = precision;
6850 }
6851
6852 /* Handle vect_determine_precisions for STMT_INFO, given that we
6853 have already done so for the users of its result. */
6854
6855 void
6856 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
6857 {
6858 vect_determine_min_output_precision (vinfo, stmt_info);
6859 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6860 {
6861 vect_determine_precisions_from_range (stmt_info, stmt);
6862 vect_determine_precisions_from_users (stmt_info, stmt);
6863 }
6864 }
6865
6866 /* Walk backwards through the vectorizable region to determine the
6867 values of these fields:
6868
6869 - min_output_precision
6870 - min_input_precision
6871 - operation_precision
6872 - operation_sign. */
6873
6874 void
6875 vect_determine_precisions (vec_info *vinfo)
6876 {
6877 DUMP_VECT_SCOPE ("vect_determine_precisions");
6878
6879 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
6880 {
6881 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6882 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6883 unsigned int nbbs = loop->num_nodes;
6884
6885 for (unsigned int i = 0; i < nbbs; i++)
6886 {
6887 basic_block bb = bbs[i];
6888 for (auto gsi = gsi_start_phis (bb);
6889 !gsi_end_p (gsi); gsi_next (&gsi))
6890 {
6891 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6892 if (stmt_info)
6893 vect_determine_mask_precision (vinfo, stmt_info);
6894 }
6895 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
6896 if (!is_gimple_debug (gsi_stmt (si)))
6897 vect_determine_mask_precision
6898 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6899 }
6900 for (unsigned int i = 0; i < nbbs; i++)
6901 {
6902 basic_block bb = bbs[nbbs - i - 1];
6903 for (gimple_stmt_iterator si = gsi_last_bb (bb);
6904 !gsi_end_p (si); gsi_prev (&si))
6905 if (!is_gimple_debug (gsi_stmt (si)))
6906 vect_determine_stmt_precisions
6907 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6908 for (auto gsi = gsi_start_phis (bb);
6909 !gsi_end_p (gsi); gsi_next (&gsi))
6910 {
6911 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6912 if (stmt_info)
6913 vect_determine_stmt_precisions (vinfo, stmt_info);
6914 }
6915 }
6916 }
6917 else
6918 {
6919 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6920 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6921 {
6922 basic_block bb = bb_vinfo->bbs[i];
6923 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6924 {
6925 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6926 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6927 vect_determine_mask_precision (vinfo, stmt_info);
6928 }
6929 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6930 {
6931 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6932 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6933 vect_determine_mask_precision (vinfo, stmt_info);
6934 }
6935 }
6936 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
6937 {
6938 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
6939 !gsi_end_p (gsi); gsi_prev (&gsi))
6940 {
6941 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6942 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6943 vect_determine_stmt_precisions (vinfo, stmt_info);
6944 }
6945 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
6946 !gsi_end_p (gsi); gsi_next (&gsi))
6947 {
6948 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6949 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6950 vect_determine_stmt_precisions (vinfo, stmt_info);
6951 }
6952 }
6953 }
6954 }
6955
6956 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
6957
6958 struct vect_recog_func
6959 {
6960 vect_recog_func_ptr fn;
6961 const char *name;
6962 };
6963
6964 /* Note that ordering matters - the first pattern matching on a stmt is
6965 taken which means usually the more complex one needs to preceed the
6966 less comples onex (widen_sum only after dot_prod or sad for example). */
6967 static vect_recog_func vect_vect_recog_func_ptrs[] = {
6968 { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
6969 { vect_recog_bit_insert_pattern, "bit_insert" },
6970 { vect_recog_abd_pattern, "abd" },
6971 { vect_recog_over_widening_pattern, "over_widening" },
6972 /* Must come after over_widening, which narrows the shift as much as
6973 possible beforehand. */
6974 { vect_recog_average_pattern, "average" },
6975 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
6976 { vect_recog_mulhs_pattern, "mult_high" },
6977 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
6978 { vect_recog_widen_mult_pattern, "widen_mult" },
6979 { vect_recog_dot_prod_pattern, "dot_prod" },
6980 { vect_recog_sad_pattern, "sad" },
6981 { vect_recog_widen_sum_pattern, "widen_sum" },
6982 { vect_recog_pow_pattern, "pow" },
6983 { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
6984 { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
6985 { vect_recog_widen_shift_pattern, "widen_shift" },
6986 { vect_recog_rotate_pattern, "rotate" },
6987 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
6988 { vect_recog_divmod_pattern, "divmod" },
6989 { vect_recog_mult_pattern, "mult" },
6990 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
6991 { vect_recog_gcond_pattern, "gcond" },
6992 { vect_recog_bool_pattern, "bool" },
6993 /* This must come before mask conversion, and includes the parts
6994 of mask conversion that are needed for gather and scatter
6995 internal functions. */
6996 { vect_recog_gather_scatter_pattern, "gather_scatter" },
6997 { vect_recog_mask_conversion_pattern, "mask_conversion" },
6998 { vect_recog_widen_plus_pattern, "widen_plus" },
6999 { vect_recog_widen_minus_pattern, "widen_minus" },
7000 { vect_recog_widen_abd_pattern, "widen_abd" },
7001 /* These must come after the double widening ones. */
7002 };
7003
7004 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
7005
7006 /* Mark statements that are involved in a pattern. */
7007
7008 void
7009 vect_mark_pattern_stmts (vec_info *vinfo,
7010 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
7011 tree pattern_vectype)
7012 {
7013 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
7014 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7015
7016 gimple *orig_pattern_stmt = NULL;
7017 if (is_pattern_stmt_p (orig_stmt_info))
7018 {
7019 /* We're replacing a statement in an existing pattern definition
7020 sequence. */
7021 orig_pattern_stmt = orig_stmt_info->stmt;
7022 if (dump_enabled_p ())
7023 dump_printf_loc (MSG_NOTE, vect_location,
7024 "replacing earlier pattern %G", orig_pattern_stmt);
7025
7026 /* To keep the book-keeping simple, just swap the lhs of the
7027 old and new statements, so that the old one has a valid but
7028 unused lhs. */
7029 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
7030 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
7031 gimple_set_lhs (pattern_stmt, old_lhs);
7032
7033 if (dump_enabled_p ())
7034 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
7035
7036 /* Switch to the statement that ORIG replaces. */
7037 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
7038
7039 /* We shouldn't be replacing the main pattern statement. */
7040 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
7041 != orig_pattern_stmt);
7042 }
7043
7044 if (def_seq)
7045 for (gimple_stmt_iterator si = gsi_start (def_seq);
7046 !gsi_end_p (si); gsi_next (&si))
7047 {
7048 if (dump_enabled_p ())
7049 dump_printf_loc (MSG_NOTE, vect_location,
7050 "extra pattern stmt: %G", gsi_stmt (si));
7051 stmt_vec_info pattern_stmt_info
7052 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
7053 orig_stmt_info, pattern_vectype);
7054 /* Stmts in the def sequence are not vectorizable cycle or
7055 induction defs, instead they should all be vect_internal_def
7056 feeding the main pattern stmt which retains this def type. */
7057 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
7058 }
7059
7060 if (orig_pattern_stmt)
7061 {
7062 vect_init_pattern_stmt (vinfo, pattern_stmt,
7063 orig_stmt_info, pattern_vectype);
7064
7065 /* Insert all the new pattern statements before the original one. */
7066 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7067 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
7068 orig_def_seq);
7069 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
7070 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
7071
7072 /* Remove the pattern statement that this new pattern replaces. */
7073 gsi_remove (&gsi, false);
7074 }
7075 else
7076 vect_set_pattern_stmt (vinfo,
7077 pattern_stmt, orig_stmt_info, pattern_vectype);
7078
7079 /* For any conditionals mark them as vect_condition_def. */
7080 if (is_a <gcond *> (pattern_stmt))
7081 STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
7082
7083 /* Transfer reduction path info to the pattern. */
7084 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
7085 {
7086 gimple_match_op op;
7087 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
7088 gcc_unreachable ();
7089 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
7090 /* Search the pattern def sequence and the main pattern stmt. Note
7091 we may have inserted all into a containing pattern def sequence
7092 so the following is a bit awkward. */
7093 gimple_stmt_iterator si;
7094 gimple *s;
7095 if (def_seq)
7096 {
7097 si = gsi_start (def_seq);
7098 s = gsi_stmt (si);
7099 gsi_next (&si);
7100 }
7101 else
7102 {
7103 si = gsi_none ();
7104 s = pattern_stmt;
7105 }
7106 do
7107 {
7108 bool found = false;
7109 if (gimple_extract_op (s, &op))
7110 for (unsigned i = 0; i < op.num_ops; ++i)
7111 if (op.ops[i] == lookfor)
7112 {
7113 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
7114 lookfor = gimple_get_lhs (s);
7115 found = true;
7116 break;
7117 }
7118 if (s == pattern_stmt)
7119 {
7120 if (!found && dump_enabled_p ())
7121 dump_printf_loc (MSG_NOTE, vect_location,
7122 "failed to update reduction index.\n");
7123 break;
7124 }
7125 if (gsi_end_p (si))
7126 s = pattern_stmt;
7127 else
7128 {
7129 s = gsi_stmt (si);
7130 if (s == pattern_stmt)
7131 /* Found the end inside a bigger pattern def seq. */
7132 si = gsi_none ();
7133 else
7134 gsi_next (&si);
7135 }
7136 } while (1);
7137 }
7138 }
7139
7140 /* Function vect_pattern_recog_1
7141
7142 Input:
7143 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7144 computation pattern.
7145 STMT_INFO: A stmt from which the pattern search should start.
7146
7147 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7148 a sequence of statements that has the same functionality and can be
7149 used to replace STMT_INFO. It returns the last statement in the sequence
7150 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7151 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7152 statement, having first checked that the target supports the new operation
7153 in that type.
7154
7155 This function also does some bookkeeping, as explained in the documentation
7156 for vect_recog_pattern. */
7157
7158 static void
7159 vect_pattern_recog_1 (vec_info *vinfo,
7160 vect_recog_func *recog_func, stmt_vec_info stmt_info)
7161 {
7162 gimple *pattern_stmt;
7163 loop_vec_info loop_vinfo;
7164 tree pattern_vectype;
7165
7166 /* If this statement has already been replaced with pattern statements,
7167 leave the original statement alone, since the first match wins.
7168 Instead try to match against the definition statements that feed
7169 the main pattern statement. */
7170 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7171 {
7172 gimple_stmt_iterator gsi;
7173 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7174 !gsi_end_p (gsi); gsi_next (&gsi))
7175 vect_pattern_recog_1 (vinfo, recog_func,
7176 vinfo->lookup_stmt (gsi_stmt (gsi)));
7177 return;
7178 }
7179
7180 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7181 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
7182 if (!pattern_stmt)
7183 {
7184 /* Clear any half-formed pattern definition sequence. */
7185 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
7186 return;
7187 }
7188
7189 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7190
7191 /* Found a vectorizable pattern. */
7192 if (dump_enabled_p ())
7193 dump_printf_loc (MSG_NOTE, vect_location,
7194 "%s pattern recognized: %G",
7195 recog_func->name, pattern_stmt);
7196
7197 /* Mark the stmts that are involved in the pattern. */
7198 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
7199
7200 /* Patterns cannot be vectorized using SLP, because they change the order of
7201 computation. */
7202 if (loop_vinfo)
7203 {
7204 unsigned ix, ix2;
7205 stmt_vec_info *elem_ptr;
7206 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
7207 elem_ptr, *elem_ptr == stmt_info);
7208 }
7209 }
7210
7211
7212 /* Function vect_pattern_recog
7213
7214 Input:
7215 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7216 computation idioms.
7217
7218 Output - for each computation idiom that is detected we create a new stmt
7219 that provides the same functionality and that can be vectorized. We
7220 also record some information in the struct_stmt_info of the relevant
7221 stmts, as explained below:
7222
7223 At the entry to this function we have the following stmts, with the
7224 following initial value in the STMT_VINFO fields:
7225
7226 stmt in_pattern_p related_stmt vec_stmt
7227 S1: a_i = .... - - -
7228 S2: a_2 = ..use(a_i).. - - -
7229 S3: a_1 = ..use(a_2).. - - -
7230 S4: a_0 = ..use(a_1).. - - -
7231 S5: ... = ..use(a_0).. - - -
7232
7233 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7234 represented by a single stmt. We then:
7235 - create a new stmt S6 equivalent to the pattern (the stmt is not
7236 inserted into the code)
7237 - fill in the STMT_VINFO fields as follows:
7238
7239 in_pattern_p related_stmt vec_stmt
7240 S1: a_i = .... - - -
7241 S2: a_2 = ..use(a_i).. - - -
7242 S3: a_1 = ..use(a_2).. - - -
7243 S4: a_0 = ..use(a_1).. true S6 -
7244 '---> S6: a_new = .... - S4 -
7245 S5: ... = ..use(a_0).. - - -
7246
7247 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7248 to each other through the RELATED_STMT field).
7249
7250 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7251 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7252 remain irrelevant unless used by stmts other than S4.
7253
7254 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7255 (because they are marked as irrelevant). It will vectorize S6, and record
7256 a pointer to the new vector stmt VS6 from S6 (as usual).
7257 S4 will be skipped, and S5 will be vectorized as usual:
7258
7259 in_pattern_p related_stmt vec_stmt
7260 S1: a_i = .... - - -
7261 S2: a_2 = ..use(a_i).. - - -
7262 S3: a_1 = ..use(a_2).. - - -
7263 > VS6: va_new = .... - - -
7264 S4: a_0 = ..use(a_1).. true S6 VS6
7265 '---> S6: a_new = .... - S4 VS6
7266 > VS5: ... = ..vuse(va_new).. - - -
7267 S5: ... = ..use(a_0).. - - -
7268
7269 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7270 elsewhere), and we'll end up with:
7271
7272 VS6: va_new = ....
7273 VS5: ... = ..vuse(va_new)..
7274
7275 In case of more than one pattern statements, e.g., widen-mult with
7276 intermediate type:
7277
7278 S1 a_t = ;
7279 S2 a_T = (TYPE) a_t;
7280 '--> S3: a_it = (interm_type) a_t;
7281 S4 prod_T = a_T * CONST;
7282 '--> S5: prod_T' = a_it w* CONST;
7283
7284 there may be other users of a_T outside the pattern. In that case S2 will
7285 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7286 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7287 be recorded in S3. */
7288
7289 void
7290 vect_pattern_recog (vec_info *vinfo)
7291 {
7292 class loop *loop;
7293 basic_block *bbs;
7294 unsigned int nbbs;
7295 gimple_stmt_iterator si;
7296 unsigned int i, j;
7297
7298 vect_determine_precisions (vinfo);
7299
7300 DUMP_VECT_SCOPE ("vect_pattern_recog");
7301
7302 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
7303 {
7304 loop = LOOP_VINFO_LOOP (loop_vinfo);
7305 bbs = LOOP_VINFO_BBS (loop_vinfo);
7306 nbbs = loop->num_nodes;
7307
7308 /* Scan through the loop stmts, applying the pattern recognition
7309 functions starting at each stmt visited: */
7310 for (i = 0; i < nbbs; i++)
7311 {
7312 basic_block bb = bbs[i];
7313 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
7314 {
7315 if (is_gimple_debug (gsi_stmt (si)))
7316 continue;
7317 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
7318 /* Scan over all generic vect_recog_xxx_pattern functions. */
7319 for (j = 0; j < NUM_PATTERNS; j++)
7320 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
7321 stmt_info);
7322 }
7323 }
7324 }
7325 else
7326 {
7327 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
7328 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
7329 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
7330 !gsi_end_p (gsi); gsi_next (&gsi))
7331 {
7332 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
7333 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
7334 continue;
7335
7336 /* Scan over all generic vect_recog_xxx_pattern functions. */
7337 for (j = 0; j < NUM_PATTERNS; j++)
7338 vect_pattern_recog_1 (vinfo,
7339 &vect_vect_recog_func_ptrs[j], stmt_info);
7340 }
7341 }
7342
7343 /* After this no more add_stmt calls are allowed. */
7344 vinfo->stmt_vec_info_ro = true;
7345 }
7346
7347 /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7348 or internal_fn contained in ch, respectively. */
7349 gimple *
7350 vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
7351 {
7352 gcc_assert (op0 != NULL_TREE);
7353 if (ch.is_tree_code ())
7354 return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
7355
7356 gcc_assert (ch.is_internal_fn ());
7357 gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
7358 op1 == NULL_TREE ? 1 : 2,
7359 op0, op1);
7360 gimple_call_set_lhs (stmt, lhs);
7361 return stmt;
7362 }