]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-patterns.cc
Don't build readline/libreadline.a, when --with-system-readline is supplied
[thirdparty/gcc.git] / gcc / tree-vect-patterns.cc
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "gimple-fold.h"
39 #include "gimplify-me.h"
40 #include "cfgloop.h"
41 #include "tree-vectorizer.h"
42 #include "dumpfile.h"
43 #include "builtins.h"
44 #include "internal-fn.h"
45 #include "case-cfn-macros.h"
46 #include "fold-const-call.h"
47 #include "attribs.h"
48 #include "cgraph.h"
49 #include "omp-simd-clone.h"
50 #include "predict.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "gimple-range.h"
54
55
56 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
57 in the first operand. Disentangling this is future work, the
58 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
59
60
61 /* Return true if we have a useful VR_RANGE range for VAR, storing it
62 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
63
64 static bool
65 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
66 {
67 value_range vr;
68 get_range_query (cfun)->range_of_expr (vr, var);
69 if (vr.undefined_p ())
70 vr.set_varying (TREE_TYPE (var));
71 *min_value = wi::to_wide (vr.min ());
72 *max_value = wi::to_wide (vr.max ());
73 value_range_kind vr_type = vr.kind ();
74 wide_int nonzero = get_nonzero_bits (var);
75 signop sgn = TYPE_SIGN (TREE_TYPE (var));
76 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
77 nonzero, sgn) == VR_RANGE)
78 {
79 if (dump_enabled_p ())
80 {
81 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
82 dump_printf (MSG_NOTE, " has range [");
83 dump_hex (MSG_NOTE, *min_value);
84 dump_printf (MSG_NOTE, ", ");
85 dump_hex (MSG_NOTE, *max_value);
86 dump_printf (MSG_NOTE, "]\n");
87 }
88 return true;
89 }
90 else
91 {
92 if (dump_enabled_p ())
93 {
94 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
95 dump_printf (MSG_NOTE, " has no range info\n");
96 }
97 return false;
98 }
99 }
100
101 /* Report that we've found an instance of pattern PATTERN in
102 statement STMT. */
103
104 static void
105 vect_pattern_detected (const char *name, gimple *stmt)
106 {
107 if (dump_enabled_p ())
108 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
109 }
110
111 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
112 return the pattern statement's stmt_vec_info. Set its vector type to
113 VECTYPE if it doesn't have one already. */
114
115 static stmt_vec_info
116 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
117 stmt_vec_info orig_stmt_info, tree vectype)
118 {
119 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
120 if (pattern_stmt_info == NULL)
121 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
122 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
123
124 pattern_stmt_info->pattern_stmt_p = true;
125 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
126 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
127 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
128 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
129 {
130 gcc_assert (!vectype
131 || (VECTOR_BOOLEAN_TYPE_P (vectype)
132 == vect_use_mask_type_p (orig_stmt_info)));
133 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
134 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
135 }
136 return pattern_stmt_info;
137 }
138
139 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
140 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
141 have one already. */
142
143 static void
144 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
145 stmt_vec_info orig_stmt_info, tree vectype)
146 {
147 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
148 STMT_VINFO_RELATED_STMT (orig_stmt_info)
149 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
150 }
151
152 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
153 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
154 be different from the vector type of the final pattern statement.
155 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
156 from which it was derived. */
157
158 static inline void
159 append_pattern_def_seq (vec_info *vinfo,
160 stmt_vec_info stmt_info, gimple *new_stmt,
161 tree vectype = NULL_TREE,
162 tree scalar_type_for_mask = NULL_TREE)
163 {
164 gcc_assert (!scalar_type_for_mask
165 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
166 if (vectype)
167 {
168 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
169 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
170 if (scalar_type_for_mask)
171 new_stmt_info->mask_precision
172 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
173 }
174 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
175 new_stmt);
176 }
177
178 /* The caller wants to perform new operations on vect_external variable
179 VAR, so that the result of the operations would also be vect_external.
180 Return the edge on which the operations can be performed, if one exists.
181 Return null if the operations should instead be treated as part of
182 the pattern that needs them. */
183
184 static edge
185 vect_get_external_def_edge (vec_info *vinfo, tree var)
186 {
187 edge e = NULL;
188 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
189 {
190 e = loop_preheader_edge (loop_vinfo->loop);
191 if (!SSA_NAME_IS_DEFAULT_DEF (var))
192 {
193 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
194 if (bb == NULL
195 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
196 e = NULL;
197 }
198 }
199 return e;
200 }
201
202 /* Return true if the target supports a vector version of CODE,
203 where CODE is known to map to a direct optab with the given SUBTYPE.
204 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
205 specifies the type of the scalar result.
206
207 If CODE allows the inputs and outputs to have different type
208 (such as for WIDEN_SUM_EXPR), it is the input mode rather
209 than the output mode that determines the appropriate target pattern.
210 Operand 0 of the target pattern then specifies the mode that the output
211 must have.
212
213 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
214 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
215 is nonnull. */
216
217 static bool
218 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
219 tree itype, tree *vecotype_out,
220 tree *vecitype_out = NULL,
221 enum optab_subtype subtype = optab_default)
222 {
223 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
224 if (!vecitype)
225 return false;
226
227 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
228 if (!vecotype)
229 return false;
230
231 optab optab = optab_for_tree_code (code, vecitype, subtype);
232 if (!optab)
233 return false;
234
235 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
236 if (icode == CODE_FOR_nothing
237 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
238 return false;
239
240 *vecotype_out = vecotype;
241 if (vecitype_out)
242 *vecitype_out = vecitype;
243 return true;
244 }
245
246 /* Round bit precision PRECISION up to a full element. */
247
248 static unsigned int
249 vect_element_precision (unsigned int precision)
250 {
251 precision = 1 << ceil_log2 (precision);
252 return MAX (precision, BITS_PER_UNIT);
253 }
254
255 /* If OP is defined by a statement that's being considered for vectorization,
256 return information about that statement, otherwise return NULL. */
257
258 static stmt_vec_info
259 vect_get_internal_def (vec_info *vinfo, tree op)
260 {
261 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
262 if (def_stmt_info
263 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
264 return def_stmt_info;
265 return NULL;
266 }
267
268 /* Check whether NAME, an ssa-name used in STMT_VINFO,
269 is a result of a type promotion, such that:
270 DEF_STMT: NAME = NOP (name0)
271 If CHECK_SIGN is TRUE, check that either both types are signed or both are
272 unsigned. */
273
274 static bool
275 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
276 tree *orig_type, gimple **def_stmt, bool *promotion)
277 {
278 tree type = TREE_TYPE (name);
279 tree oprnd0;
280 enum vect_def_type dt;
281
282 stmt_vec_info def_stmt_info;
283 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
284 return false;
285
286 if (dt != vect_internal_def
287 && dt != vect_external_def && dt != vect_constant_def)
288 return false;
289
290 if (!*def_stmt)
291 return false;
292
293 if (!is_gimple_assign (*def_stmt))
294 return false;
295
296 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
297 return false;
298
299 oprnd0 = gimple_assign_rhs1 (*def_stmt);
300
301 *orig_type = TREE_TYPE (oprnd0);
302 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
303 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
304 return false;
305
306 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
307 *promotion = true;
308 else
309 *promotion = false;
310
311 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
312 return false;
313
314 return true;
315 }
316
317 /* Holds information about an input operand after some sign changes
318 and type promotions have been peeled away. */
319 class vect_unpromoted_value {
320 public:
321 vect_unpromoted_value ();
322
323 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
324
325 /* The value obtained after peeling away zero or more casts. */
326 tree op;
327
328 /* The type of OP. */
329 tree type;
330
331 /* The definition type of OP. */
332 vect_def_type dt;
333
334 /* If OP is the result of peeling at least one cast, and if the cast
335 of OP itself is a vectorizable statement, CASTER identifies that
336 statement, otherwise it is null. */
337 stmt_vec_info caster;
338 };
339
340 inline vect_unpromoted_value::vect_unpromoted_value ()
341 : op (NULL_TREE),
342 type (NULL_TREE),
343 dt (vect_uninitialized_def),
344 caster (NULL)
345 {
346 }
347
348 /* Set the operand to OP_IN, its definition type to DT_IN, and the
349 statement that casts it to CASTER_IN. */
350
351 inline void
352 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
353 stmt_vec_info caster_in)
354 {
355 op = op_in;
356 type = TREE_TYPE (op);
357 dt = dt_in;
358 caster = caster_in;
359 }
360
361 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
362 to reach some vectorizable inner operand OP', continuing as long as it
363 is possible to convert OP' back to OP using a possible sign change
364 followed by a possible promotion P. Return this OP', or null if OP is
365 not a vectorizable SSA name. If there is a promotion P, describe its
366 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
367 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
368 have more than one user.
369
370 A successful return means that it is possible to go from OP' to OP
371 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
372 whereas the cast from UNPROM to OP might be a promotion, a sign
373 change, or a nop.
374
375 E.g. say we have:
376
377 signed short *ptr = ...;
378 signed short C = *ptr;
379 unsigned short B = (unsigned short) C; // sign change
380 signed int A = (signed int) B; // unsigned promotion
381 ...possible other uses of A...
382 unsigned int OP = (unsigned int) A; // sign change
383
384 In this case it's possible to go directly from C to OP using:
385
386 OP = (unsigned int) (unsigned short) C;
387 +------------+ +--------------+
388 promotion sign change
389
390 so OP' would be C. The input to the promotion is B, so UNPROM
391 would describe B. */
392
393 static tree
394 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
395 vect_unpromoted_value *unprom,
396 bool *single_use_p = NULL)
397 {
398 tree res = NULL_TREE;
399 tree op_type = TREE_TYPE (op);
400 unsigned int orig_precision = TYPE_PRECISION (op_type);
401 unsigned int min_precision = orig_precision;
402 stmt_vec_info caster = NULL;
403 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
404 {
405 /* See whether OP is simple enough to vectorize. */
406 stmt_vec_info def_stmt_info;
407 gimple *def_stmt;
408 vect_def_type dt;
409 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
410 break;
411
412 /* If OP is the input of a demotion, skip over it to see whether
413 OP is itself the result of a promotion. If so, the combined
414 effect of the promotion and the demotion might fit the required
415 pattern, otherwise neither operation fits.
416
417 This copes with cases such as the result of an arithmetic
418 operation being truncated before being stored, and where that
419 arithmetic operation has been recognized as an over-widened one. */
420 if (TYPE_PRECISION (op_type) <= min_precision)
421 {
422 /* Use OP as the UNPROM described above if we haven't yet
423 found a promotion, or if using the new input preserves the
424 sign of the previous promotion. */
425 if (!res
426 || TYPE_PRECISION (unprom->type) == orig_precision
427 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
428 {
429 unprom->set_op (op, dt, caster);
430 min_precision = TYPE_PRECISION (op_type);
431 }
432 /* Stop if we've already seen a promotion and if this
433 conversion does more than change the sign. */
434 else if (TYPE_PRECISION (op_type)
435 != TYPE_PRECISION (unprom->type))
436 break;
437
438 /* The sequence now extends to OP. */
439 res = op;
440 }
441
442 /* See whether OP is defined by a cast. Record it as CASTER if
443 the cast is potentially vectorizable. */
444 if (!def_stmt)
445 break;
446 caster = def_stmt_info;
447
448 /* Ignore pattern statements, since we don't link uses for them. */
449 if (caster
450 && single_use_p
451 && !STMT_VINFO_RELATED_STMT (caster)
452 && !has_single_use (res))
453 *single_use_p = false;
454
455 gassign *assign = dyn_cast <gassign *> (def_stmt);
456 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
457 break;
458
459 /* Continue with the input to the cast. */
460 op = gimple_assign_rhs1 (def_stmt);
461 op_type = TREE_TYPE (op);
462 }
463 return res;
464 }
465
466 /* OP is an integer operand to an operation that returns TYPE, and we
467 want to treat the operation as a widening one. So far we can treat
468 it as widening from *COMMON_TYPE.
469
470 Return true if OP is suitable for such a widening operation,
471 either widening from *COMMON_TYPE or from some supertype of it.
472 Update *COMMON_TYPE to the supertype in the latter case.
473
474 SHIFT_P is true if OP is a shift amount. */
475
476 static bool
477 vect_joust_widened_integer (tree type, bool shift_p, tree op,
478 tree *common_type)
479 {
480 /* Calculate the minimum precision required by OP, without changing
481 the sign of either operand. */
482 unsigned int precision;
483 if (shift_p)
484 {
485 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
486 return false;
487 precision = TREE_INT_CST_LOW (op);
488 }
489 else
490 {
491 precision = wi::min_precision (wi::to_widest (op),
492 TYPE_SIGN (*common_type));
493 if (precision * 2 > TYPE_PRECISION (type))
494 return false;
495 }
496
497 /* If OP requires a wider type, switch to that type. The checks
498 above ensure that this is still narrower than the result. */
499 precision = vect_element_precision (precision);
500 if (TYPE_PRECISION (*common_type) < precision)
501 *common_type = build_nonstandard_integer_type
502 (precision, TYPE_UNSIGNED (*common_type));
503 return true;
504 }
505
506 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
507 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
508
509 static bool
510 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
511 {
512 if (types_compatible_p (*common_type, new_type))
513 return true;
514
515 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
516 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
517 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
518 return true;
519
520 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
521 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
522 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
523 {
524 *common_type = new_type;
525 return true;
526 }
527
528 /* We have mismatched signs, with the signed type being
529 no wider than the unsigned type. In this case we need
530 a wider signed type. */
531 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
532 TYPE_PRECISION (new_type));
533 precision *= 2;
534
535 if (precision * 2 > TYPE_PRECISION (type))
536 return false;
537
538 *common_type = build_nonstandard_integer_type (precision, false);
539 return true;
540 }
541
542 /* Check whether STMT_INFO can be viewed as a tree of integer operations
543 in which each node either performs CODE or WIDENED_CODE, and where
544 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
545 specifies the maximum number of leaf operands. SHIFT_P says whether
546 CODE and WIDENED_CODE are some sort of shift.
547
548 If STMT_INFO is such a tree, return the number of leaf operands
549 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
550 to a type that (a) is narrower than the result of STMT_INFO and
551 (b) can hold all leaf operand values.
552
553 If SUBTYPE then allow that the signs of the operands
554 may differ in signs but not in precision. SUBTYPE is updated to reflect
555 this.
556
557 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
558 exists. */
559
560 static unsigned int
561 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
562 tree_code widened_code, bool shift_p,
563 unsigned int max_nops,
564 vect_unpromoted_value *unprom, tree *common_type,
565 enum optab_subtype *subtype = NULL)
566 {
567 /* Check for an integer operation with the right code. */
568 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
569 if (!assign)
570 return 0;
571
572 tree_code rhs_code = gimple_assign_rhs_code (assign);
573 if (rhs_code != code && rhs_code != widened_code)
574 return 0;
575
576 tree type = TREE_TYPE (gimple_assign_lhs (assign));
577 if (!INTEGRAL_TYPE_P (type))
578 return 0;
579
580 /* Assume that both operands will be leaf operands. */
581 max_nops -= 2;
582
583 /* Check the operands. */
584 unsigned int next_op = 0;
585 for (unsigned int i = 0; i < 2; ++i)
586 {
587 vect_unpromoted_value *this_unprom = &unprom[next_op];
588 unsigned int nops = 1;
589 tree op = gimple_op (assign, i + 1);
590 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
591 {
592 /* We already have a common type from earlier operands.
593 Update it to account for OP. */
594 this_unprom->set_op (op, vect_constant_def);
595 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
596 return 0;
597 }
598 else
599 {
600 /* Only allow shifts by constants. */
601 if (shift_p && i == 1)
602 return 0;
603
604 if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
605 return 0;
606
607 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
608 {
609 /* The operand isn't widened. If STMT_INFO has the code
610 for an unwidened operation, recursively check whether
611 this operand is a node of the tree. */
612 if (rhs_code != code
613 || max_nops == 0
614 || this_unprom->dt != vect_internal_def)
615 return 0;
616
617 /* Give back the leaf slot allocated above now that we're
618 not treating this as a leaf operand. */
619 max_nops += 1;
620
621 /* Recursively process the definition of the operand. */
622 stmt_vec_info def_stmt_info
623 = vinfo->lookup_def (this_unprom->op);
624 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
625 widened_code, shift_p, max_nops,
626 this_unprom, common_type,
627 subtype);
628 if (nops == 0)
629 return 0;
630
631 max_nops -= nops;
632 }
633 else
634 {
635 /* Make sure that the operand is narrower than the result. */
636 if (TYPE_PRECISION (this_unprom->type) * 2
637 > TYPE_PRECISION (type))
638 return 0;
639
640 /* Update COMMON_TYPE for the new operand. */
641 if (i == 0)
642 *common_type = this_unprom->type;
643 else if (!vect_joust_widened_type (type, this_unprom->type,
644 common_type))
645 {
646 if (subtype)
647 {
648 /* See if we can sign extend the smaller type. */
649 if (TYPE_PRECISION (this_unprom->type)
650 > TYPE_PRECISION (*common_type))
651 *common_type = this_unprom->type;
652 *subtype = optab_vector_mixed_sign;
653 }
654 else
655 return 0;
656 }
657 }
658 }
659 next_op += nops;
660 }
661 return next_op;
662 }
663
664 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
665 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
666
667 static tree
668 vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
669 {
670 return make_temp_ssa_name (type, stmt, "patt");
671 }
672
673 /* STMT2_INFO describes a type conversion that could be split into STMT1
674 followed by a version of STMT2_INFO that takes NEW_RHS as its first
675 input. Try to do this using pattern statements, returning true on
676 success. */
677
678 static bool
679 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
680 gimple *stmt1, tree vectype)
681 {
682 if (is_pattern_stmt_p (stmt2_info))
683 {
684 /* STMT2_INFO is part of a pattern. Get the statement to which
685 the pattern is attached. */
686 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
687 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
688
689 if (dump_enabled_p ())
690 dump_printf_loc (MSG_NOTE, vect_location,
691 "Splitting pattern statement: %G", stmt2_info->stmt);
692
693 /* Since STMT2_INFO is a pattern statement, we can change it
694 in-situ without worrying about changing the code for the
695 containing block. */
696 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
697
698 if (dump_enabled_p ())
699 {
700 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
701 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
702 stmt2_info->stmt);
703 }
704
705 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
706 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
707 /* STMT2_INFO is the actual pattern statement. Add STMT1
708 to the end of the definition sequence. */
709 gimple_seq_add_stmt_without_update (def_seq, stmt1);
710 else
711 {
712 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
713 before it. */
714 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
715 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
716 }
717 return true;
718 }
719 else
720 {
721 /* STMT2_INFO doesn't yet have a pattern. Try to create a
722 two-statement pattern now. */
723 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
724 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
725 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
726 if (!lhs_vectype)
727 return false;
728
729 if (dump_enabled_p ())
730 dump_printf_loc (MSG_NOTE, vect_location,
731 "Splitting statement: %G", stmt2_info->stmt);
732
733 /* Add STMT1 as a singleton pattern definition sequence. */
734 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
735 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
736 gimple_seq_add_stmt_without_update (def_seq, stmt1);
737
738 /* Build the second of the two pattern statements. */
739 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
740 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
741 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
742
743 if (dump_enabled_p ())
744 {
745 dump_printf_loc (MSG_NOTE, vect_location,
746 "into pattern statements: %G", stmt1);
747 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
748 (gimple *) new_stmt2);
749 }
750
751 return true;
752 }
753 }
754
755 /* Convert UNPROM to TYPE and return the result, adding new statements
756 to STMT_INFO's pattern definition statements if no better way is
757 available. VECTYPE is the vector form of TYPE.
758
759 If SUBTYPE then convert the type based on the subtype. */
760
761 static tree
762 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
763 vect_unpromoted_value *unprom, tree vectype,
764 enum optab_subtype subtype = optab_default)
765 {
766 /* Update the type if the signs differ. */
767 if (subtype == optab_vector_mixed_sign)
768 {
769 gcc_assert (!TYPE_UNSIGNED (type));
770 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
771 {
772 type = unsigned_type_for (type);
773 vectype = unsigned_type_for (vectype);
774 }
775 }
776
777 /* Check for a no-op conversion. */
778 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
779 return unprom->op;
780
781 /* Allow the caller to create constant vect_unpromoted_values. */
782 if (TREE_CODE (unprom->op) == INTEGER_CST)
783 return wide_int_to_tree (type, wi::to_widest (unprom->op));
784
785 tree input = unprom->op;
786 if (unprom->caster)
787 {
788 tree lhs = gimple_get_lhs (unprom->caster->stmt);
789 tree lhs_type = TREE_TYPE (lhs);
790
791 /* If the result of the existing cast is the right width, use it
792 instead of the source of the cast. */
793 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
794 input = lhs;
795 /* If the precision we want is between the source and result
796 precisions of the existing cast, try splitting the cast into
797 two and tapping into a mid-way point. */
798 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
799 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
800 {
801 /* In order to preserve the semantics of the original cast,
802 give the mid-way point the same signedness as the input value.
803
804 It would be possible to use a signed type here instead if
805 TYPE is signed and UNPROM->TYPE is unsigned, but that would
806 make the sign of the midtype sensitive to the order in
807 which we process the statements, since the signedness of
808 TYPE is the signedness required by just one of possibly
809 many users. Also, unsigned promotions are usually as cheap
810 as or cheaper than signed ones, so it's better to keep an
811 unsigned promotion. */
812 tree midtype = build_nonstandard_integer_type
813 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
814 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
815 if (vec_midtype)
816 {
817 input = vect_recog_temp_ssa_var (midtype, NULL);
818 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
819 unprom->op);
820 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
821 vec_midtype))
822 append_pattern_def_seq (vinfo, stmt_info,
823 new_stmt, vec_midtype);
824 }
825 }
826
827 /* See if we can reuse an existing result. */
828 if (types_compatible_p (type, TREE_TYPE (input)))
829 return input;
830 }
831
832 /* We need a new conversion statement. */
833 tree new_op = vect_recog_temp_ssa_var (type, NULL);
834 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
835
836 /* If OP is an external value, see if we can insert the new statement
837 on an incoming edge. */
838 if (input == unprom->op && unprom->dt == vect_external_def)
839 if (edge e = vect_get_external_def_edge (vinfo, input))
840 {
841 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
842 gcc_assert (!new_bb);
843 return new_op;
844 }
845
846 /* As a (common) last resort, add the statement to the pattern itself. */
847 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
848 return new_op;
849 }
850
851 /* Invoke vect_convert_input for N elements of UNPROM and store the
852 result in the corresponding elements of RESULT.
853
854 If SUBTYPE then convert the type based on the subtype. */
855
856 static void
857 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
858 tree *result, tree type, vect_unpromoted_value *unprom,
859 tree vectype, enum optab_subtype subtype = optab_default)
860 {
861 for (unsigned int i = 0; i < n; ++i)
862 {
863 unsigned int j;
864 for (j = 0; j < i; ++j)
865 if (unprom[j].op == unprom[i].op)
866 break;
867
868 if (j < i)
869 result[i] = result[j];
870 else
871 result[i] = vect_convert_input (vinfo, stmt_info,
872 type, &unprom[i], vectype, subtype);
873 }
874 }
875
876 /* The caller has created a (possibly empty) sequence of pattern definition
877 statements followed by a single statement PATTERN_STMT. Cast the result
878 of this final statement to TYPE. If a new statement is needed, add
879 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
880 and return the new statement, otherwise return PATTERN_STMT as-is.
881 VECITYPE is the vector form of PATTERN_STMT's result type. */
882
883 static gimple *
884 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
885 gimple *pattern_stmt, tree vecitype)
886 {
887 tree lhs = gimple_get_lhs (pattern_stmt);
888 if (!types_compatible_p (type, TREE_TYPE (lhs)))
889 {
890 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
891 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
892 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
893 }
894 return pattern_stmt;
895 }
896
897 /* Return true if STMT_VINFO describes a reduction for which reassociation
898 is allowed. If STMT_INFO is part of a group, assume that it's part of
899 a reduction chain and optimistically assume that all statements
900 except the last allow reassociation.
901 Also require it to have code CODE and to be a reduction
902 in the outermost loop. When returning true, store the operands in
903 *OP0_OUT and *OP1_OUT. */
904
905 static bool
906 vect_reassociating_reduction_p (vec_info *vinfo,
907 stmt_vec_info stmt_info, tree_code code,
908 tree *op0_out, tree *op1_out)
909 {
910 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
911 if (!loop_info)
912 return false;
913
914 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
915 if (!assign || gimple_assign_rhs_code (assign) != code)
916 return false;
917
918 /* We don't allow changing the order of the computation in the inner-loop
919 when doing outer-loop vectorization. */
920 class loop *loop = LOOP_VINFO_LOOP (loop_info);
921 if (loop && nested_in_vect_loop_p (loop, stmt_info))
922 return false;
923
924 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
925 {
926 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
927 code))
928 return false;
929 }
930 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
931 return false;
932
933 *op0_out = gimple_assign_rhs1 (assign);
934 *op1_out = gimple_assign_rhs2 (assign);
935 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
936 std::swap (*op0_out, *op1_out);
937 return true;
938 }
939
940 /* match.pd function to match
941 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
942 with conditions:
943 1) @1, @2, c, d, a, b are all integral type.
944 2) There's single_use for both @1 and @2.
945 3) a, c have same precision.
946 4) c and @1 have different precision.
947 5) c, d are the same type or they can differ in sign when convert is
948 truncation.
949
950 record a and c and d and @3. */
951
952 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
953
954 /* Function vect_recog_cond_expr_convert
955
956 Try to find the following pattern:
957
958 TYPE_AB A,B;
959 TYPE_CD C,D;
960 TYPE_E E;
961 TYPE_E op_true = (TYPE_E) A;
962 TYPE_E op_false = (TYPE_E) B;
963
964 E = C cmp D ? op_true : op_false;
965
966 where
967 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
968 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
969 single_use of op_true and op_false.
970 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
971
972 Input:
973
974 * STMT_VINFO: The stmt from which the pattern search begins.
975 here it starts with E = c cmp D ? op_true : op_false;
976
977 Output:
978
979 TYPE1 E' = C cmp D ? A : B;
980 TYPE3 E = (TYPE3) E';
981
982 There may extra nop_convert for A or B to handle different signness.
983
984 * TYPE_OUT: The vector type of the output of this pattern.
985
986 * Return value: A new stmt that will be used to replace the sequence of
987 stmts that constitute the pattern. In this case it will be:
988 E = (TYPE3)E';
989 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
990
991 static gimple *
992 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
993 stmt_vec_info stmt_vinfo, tree *type_out)
994 {
995 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
996 tree lhs, match[4], temp, type, new_lhs, op2;
997 gimple *cond_stmt;
998 gimple *pattern_stmt;
999
1000 if (!last_stmt)
1001 return NULL;
1002
1003 lhs = gimple_assign_lhs (last_stmt);
1004
1005 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1006 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1007 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1008 return NULL;
1009
1010 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1011
1012 op2 = match[2];
1013 type = TREE_TYPE (match[1]);
1014 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1015 {
1016 op2 = vect_recog_temp_ssa_var (type, NULL);
1017 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1018 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1019 get_vectype_for_scalar_type (vinfo, type));
1020 }
1021
1022 temp = vect_recog_temp_ssa_var (type, NULL);
1023 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1024 match[1], op2));
1025 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1026 get_vectype_for_scalar_type (vinfo, type));
1027 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1028 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1029 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1030
1031 if (dump_enabled_p ())
1032 dump_printf_loc (MSG_NOTE, vect_location,
1033 "created pattern stmt: %G", pattern_stmt);
1034 return pattern_stmt;
1035 }
1036
1037 /* Function vect_recog_dot_prod_pattern
1038
1039 Try to find the following pattern:
1040
1041 type1a x_t
1042 type1b y_t;
1043 TYPE1 prod;
1044 TYPE2 sum = init;
1045 loop:
1046 sum_0 = phi <init, sum_1>
1047 S1 x_t = ...
1048 S2 y_t = ...
1049 S3 x_T = (TYPE1) x_t;
1050 S4 y_T = (TYPE1) y_t;
1051 S5 prod = x_T * y_T;
1052 [S6 prod = (TYPE2) prod; #optional]
1053 S7 sum_1 = prod + sum_0;
1054
1055 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1056 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1057 'type1a' and 'type1b' can differ.
1058
1059 Input:
1060
1061 * STMT_VINFO: The stmt from which the pattern search begins. In the
1062 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1063 will be detected.
1064
1065 Output:
1066
1067 * TYPE_OUT: The type of the output of this pattern.
1068
1069 * Return value: A new stmt that will be used to replace the sequence of
1070 stmts that constitute the pattern. In this case it will be:
1071 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1072
1073 Note: The dot-prod idiom is a widening reduction pattern that is
1074 vectorized without preserving all the intermediate results. It
1075 produces only N/2 (widened) results (by summing up pairs of
1076 intermediate results) rather than all N results. Therefore, we
1077 cannot allow this pattern when we want to get all the results and in
1078 the correct order (as is the case when this computation is in an
1079 inner-loop nested in an outer-loop that us being vectorized). */
1080
1081 static gimple *
1082 vect_recog_dot_prod_pattern (vec_info *vinfo,
1083 stmt_vec_info stmt_vinfo, tree *type_out)
1084 {
1085 tree oprnd0, oprnd1;
1086 gimple *last_stmt = stmt_vinfo->stmt;
1087 tree type, half_type;
1088 gimple *pattern_stmt;
1089 tree var;
1090
1091 /* Look for the following pattern
1092 DX = (TYPE1) X;
1093 DY = (TYPE1) Y;
1094 DPROD = DX * DY;
1095 DDPROD = (TYPE2) DPROD;
1096 sum_1 = DDPROD + sum_0;
1097 In which
1098 - DX is double the size of X
1099 - DY is double the size of Y
1100 - DX, DY, DPROD all have the same type but the sign
1101 between X, Y and DPROD can differ.
1102 - sum is the same size of DPROD or bigger
1103 - sum has been recognized as a reduction variable.
1104
1105 This is equivalent to:
1106 DPROD = X w* Y; #widen mult
1107 sum_1 = DPROD w+ sum_0; #widen summation
1108 or
1109 DPROD = X w* Y; #widen mult
1110 sum_1 = DPROD + sum_0; #summation
1111 */
1112
1113 /* Starting from LAST_STMT, follow the defs of its uses in search
1114 of the above pattern. */
1115
1116 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1117 &oprnd0, &oprnd1))
1118 return NULL;
1119
1120 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1121
1122 vect_unpromoted_value unprom_mult;
1123 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1124
1125 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1126 we know that oprnd1 is the reduction variable (defined by a loop-header
1127 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1128 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1129 if (!oprnd0)
1130 return NULL;
1131
1132 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1133 if (!mult_vinfo)
1134 return NULL;
1135
1136 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1137 inside the loop (in case we are analyzing an outer-loop). */
1138 vect_unpromoted_value unprom0[2];
1139 enum optab_subtype subtype = optab_vector;
1140 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1141 false, 2, unprom0, &half_type, &subtype))
1142 return NULL;
1143
1144 /* If there are two widening operations, make sure they agree on the sign
1145 of the extension. The result of an optab_vector_mixed_sign operation
1146 is signed; otherwise, the result has the same sign as the operands. */
1147 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1148 && (subtype == optab_vector_mixed_sign
1149 ? TYPE_UNSIGNED (unprom_mult.type)
1150 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1151 return NULL;
1152
1153 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1154
1155 /* If the inputs have mixed signs, canonicalize on using the signed
1156 input type for analysis. This also helps when emulating mixed-sign
1157 operations using signed operations. */
1158 if (subtype == optab_vector_mixed_sign)
1159 half_type = signed_type_for (half_type);
1160
1161 tree half_vectype;
1162 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1163 type_out, &half_vectype, subtype))
1164 {
1165 /* We can emulate a mixed-sign dot-product using a sequence of
1166 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1167 if (subtype != optab_vector_mixed_sign
1168 || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1169 DOT_PROD_EXPR, half_type,
1170 type_out, &half_vectype,
1171 optab_vector))
1172 return NULL;
1173
1174 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1175 *type_out);
1176 }
1177
1178 /* Get the inputs in the appropriate types. */
1179 tree mult_oprnd[2];
1180 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1181 unprom0, half_vectype, subtype);
1182
1183 var = vect_recog_temp_ssa_var (type, NULL);
1184 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1185 mult_oprnd[0], mult_oprnd[1], oprnd1);
1186
1187 return pattern_stmt;
1188 }
1189
1190
1191 /* Function vect_recog_sad_pattern
1192
1193 Try to find the following Sum of Absolute Difference (SAD) pattern:
1194
1195 type x_t, y_t;
1196 signed TYPE1 diff, abs_diff;
1197 TYPE2 sum = init;
1198 loop:
1199 sum_0 = phi <init, sum_1>
1200 S1 x_t = ...
1201 S2 y_t = ...
1202 S3 x_T = (TYPE1) x_t;
1203 S4 y_T = (TYPE1) y_t;
1204 S5 diff = x_T - y_T;
1205 S6 abs_diff = ABS_EXPR <diff>;
1206 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1207 S8 sum_1 = abs_diff + sum_0;
1208
1209 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1210 same size of 'TYPE1' or bigger. This is a special case of a reduction
1211 computation.
1212
1213 Input:
1214
1215 * STMT_VINFO: The stmt from which the pattern search begins. In the
1216 example, when this function is called with S8, the pattern
1217 {S3,S4,S5,S6,S7,S8} will be detected.
1218
1219 Output:
1220
1221 * TYPE_OUT: The type of the output of this pattern.
1222
1223 * Return value: A new stmt that will be used to replace the sequence of
1224 stmts that constitute the pattern. In this case it will be:
1225 SAD_EXPR <x_t, y_t, sum_0>
1226 */
1227
1228 static gimple *
1229 vect_recog_sad_pattern (vec_info *vinfo,
1230 stmt_vec_info stmt_vinfo, tree *type_out)
1231 {
1232 gimple *last_stmt = stmt_vinfo->stmt;
1233 tree half_type;
1234
1235 /* Look for the following pattern
1236 DX = (TYPE1) X;
1237 DY = (TYPE1) Y;
1238 DDIFF = DX - DY;
1239 DAD = ABS_EXPR <DDIFF>;
1240 DDPROD = (TYPE2) DPROD;
1241 sum_1 = DAD + sum_0;
1242 In which
1243 - DX is at least double the size of X
1244 - DY is at least double the size of Y
1245 - DX, DY, DDIFF, DAD all have the same type
1246 - sum is the same size of DAD or bigger
1247 - sum has been recognized as a reduction variable.
1248
1249 This is equivalent to:
1250 DDIFF = X w- Y; #widen sub
1251 DAD = ABS_EXPR <DDIFF>;
1252 sum_1 = DAD w+ sum_0; #widen summation
1253 or
1254 DDIFF = X w- Y; #widen sub
1255 DAD = ABS_EXPR <DDIFF>;
1256 sum_1 = DAD + sum_0; #summation
1257 */
1258
1259 /* Starting from LAST_STMT, follow the defs of its uses in search
1260 of the above pattern. */
1261
1262 tree plus_oprnd0, plus_oprnd1;
1263 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1264 &plus_oprnd0, &plus_oprnd1))
1265 return NULL;
1266
1267 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1268
1269 /* Any non-truncating sequence of conversions is OK here, since
1270 with a successful match, the result of the ABS(U) is known to fit
1271 within the nonnegative range of the result type. (It cannot be the
1272 negative of the minimum signed value due to the range of the widening
1273 MINUS_EXPR.) */
1274 vect_unpromoted_value unprom_abs;
1275 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1276 &unprom_abs);
1277
1278 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1279 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1280 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1281 Then check that plus_oprnd0 is defined by an abs_expr. */
1282
1283 if (!plus_oprnd0)
1284 return NULL;
1285
1286 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1287 if (!abs_stmt_vinfo)
1288 return NULL;
1289
1290 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1291 inside the loop (in case we are analyzing an outer-loop). */
1292 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1293 if (!abs_stmt
1294 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1295 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1296 return NULL;
1297
1298 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1299 tree abs_type = TREE_TYPE (abs_oprnd);
1300 if (TYPE_UNSIGNED (abs_type))
1301 return NULL;
1302
1303 /* Peel off conversions from the ABS input. This can involve sign
1304 changes (e.g. from an unsigned subtraction to a signed ABS input)
1305 or signed promotion, but it can't include unsigned promotion.
1306 (Note that ABS of an unsigned promotion should have been folded
1307 away before now anyway.) */
1308 vect_unpromoted_value unprom_diff;
1309 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1310 &unprom_diff);
1311 if (!abs_oprnd)
1312 return NULL;
1313 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1314 && TYPE_UNSIGNED (unprom_diff.type))
1315 return NULL;
1316
1317 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1318 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1319 if (!diff_stmt_vinfo)
1320 return NULL;
1321
1322 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1323 inside the loop (in case we are analyzing an outer-loop). */
1324 vect_unpromoted_value unprom[2];
1325 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1326 false, 2, unprom, &half_type))
1327 return NULL;
1328
1329 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1330
1331 tree half_vectype;
1332 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1333 type_out, &half_vectype))
1334 return NULL;
1335
1336 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1337 tree sad_oprnd[2];
1338 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1339 unprom, half_vectype);
1340
1341 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1342 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1343 sad_oprnd[1], plus_oprnd1);
1344
1345 return pattern_stmt;
1346 }
1347
1348 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1349 so that it can be treated as though it had the form:
1350
1351 A_TYPE a;
1352 B_TYPE b;
1353 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1354 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1355 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1356 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1357 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1358
1359 Try to replace the pattern with:
1360
1361 A_TYPE a;
1362 B_TYPE b;
1363 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1364 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1365 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1366 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1367
1368 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1369
1370 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1371 name of the pattern being matched, for dump purposes. */
1372
1373 static gimple *
1374 vect_recog_widen_op_pattern (vec_info *vinfo,
1375 stmt_vec_info last_stmt_info, tree *type_out,
1376 tree_code orig_code, tree_code wide_code,
1377 bool shift_p, const char *name)
1378 {
1379 gimple *last_stmt = last_stmt_info->stmt;
1380
1381 vect_unpromoted_value unprom[2];
1382 tree half_type;
1383 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1384 shift_p, 2, unprom, &half_type))
1385 return NULL;
1386
1387 /* Pattern detected. */
1388 vect_pattern_detected (name, last_stmt);
1389
1390 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1391 tree itype = type;
1392 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1393 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1394 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1395 TYPE_UNSIGNED (half_type));
1396
1397 /* Check target support */
1398 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1399 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1400 tree ctype = itype;
1401 tree vecctype = vecitype;
1402 if (orig_code == MINUS_EXPR
1403 && TYPE_UNSIGNED (itype)
1404 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1405 {
1406 /* Subtraction is special, even if half_type is unsigned and no matter
1407 whether type is signed or unsigned, if type is wider than itype,
1408 we need to sign-extend from the widening operation result to the
1409 result type.
1410 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1411 itype unsigned short and type either int or unsigned int.
1412 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1413 (unsigned short) 0xffff, but for type int we want the result -1
1414 and for type unsigned int 0xffffffff rather than 0xffff. */
1415 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1416 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1417 }
1418
1419 enum tree_code dummy_code;
1420 int dummy_int;
1421 auto_vec<tree> dummy_vec;
1422 if (!vectype
1423 || !vecitype
1424 || !vecctype
1425 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1426 vecitype, vectype,
1427 &dummy_code, &dummy_code,
1428 &dummy_int, &dummy_vec))
1429 return NULL;
1430
1431 *type_out = get_vectype_for_scalar_type (vinfo, type);
1432 if (!*type_out)
1433 return NULL;
1434
1435 tree oprnd[2];
1436 vect_convert_inputs (vinfo, last_stmt_info,
1437 2, oprnd, half_type, unprom, vectype);
1438
1439 tree var = vect_recog_temp_ssa_var (itype, NULL);
1440 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1441 oprnd[0], oprnd[1]);
1442
1443 if (vecctype != vecitype)
1444 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1445 pattern_stmt, vecitype);
1446
1447 return vect_convert_output (vinfo, last_stmt_info,
1448 type, pattern_stmt, vecctype);
1449 }
1450
1451 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1452 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1453
1454 static gimple *
1455 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1456 tree *type_out)
1457 {
1458 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1459 MULT_EXPR, WIDEN_MULT_EXPR, false,
1460 "vect_recog_widen_mult_pattern");
1461 }
1462
1463 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1464 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1465
1466 static gimple *
1467 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1468 tree *type_out)
1469 {
1470 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1471 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1472 "vect_recog_widen_plus_pattern");
1473 }
1474
1475 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1476 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1477 static gimple *
1478 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1479 tree *type_out)
1480 {
1481 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1482 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1483 "vect_recog_widen_minus_pattern");
1484 }
1485
1486 /* Function vect_recog_popcount_pattern
1487
1488 Try to find the following pattern:
1489
1490 UTYPE1 A;
1491 TYPE1 B;
1492 UTYPE2 temp_in;
1493 TYPE3 temp_out;
1494 temp_in = (UTYPE2)A;
1495
1496 temp_out = __builtin_popcount{,l,ll} (temp_in);
1497 B = (TYPE1) temp_out;
1498
1499 TYPE2 may or may not be equal to TYPE3.
1500 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1501 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1502
1503 Input:
1504
1505 * STMT_VINFO: The stmt from which the pattern search begins.
1506 here it starts with B = (TYPE1) temp_out;
1507
1508 Output:
1509
1510 * TYPE_OUT: The vector type of the output of this pattern.
1511
1512 * Return value: A new stmt that will be used to replace the sequence of
1513 stmts that constitute the pattern. In this case it will be:
1514 B = .POPCOUNT (A);
1515 */
1516
1517 static gimple *
1518 vect_recog_popcount_pattern (vec_info *vinfo,
1519 stmt_vec_info stmt_vinfo, tree *type_out)
1520 {
1521 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1522 gimple *popcount_stmt, *pattern_stmt;
1523 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1524 auto_vec<tree> vargs;
1525
1526 /* Find B = (TYPE1) temp_out. */
1527 if (!last_stmt)
1528 return NULL;
1529 tree_code code = gimple_assign_rhs_code (last_stmt);
1530 if (!CONVERT_EXPR_CODE_P (code))
1531 return NULL;
1532
1533 lhs_oprnd = gimple_assign_lhs (last_stmt);
1534 lhs_type = TREE_TYPE (lhs_oprnd);
1535 if (!INTEGRAL_TYPE_P (lhs_type))
1536 return NULL;
1537
1538 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1539 if (TREE_CODE (rhs_oprnd) != SSA_NAME
1540 || !has_single_use (rhs_oprnd))
1541 return NULL;
1542 popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1543
1544 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
1545 if (!is_gimple_call (popcount_stmt))
1546 return NULL;
1547 switch (gimple_call_combined_fn (popcount_stmt))
1548 {
1549 CASE_CFN_POPCOUNT:
1550 break;
1551 default:
1552 return NULL;
1553 }
1554
1555 if (gimple_call_num_args (popcount_stmt) != 1)
1556 return NULL;
1557
1558 rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
1559 vect_unpromoted_value unprom_diff;
1560 rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
1561 &unprom_diff);
1562
1563 if (!rhs_origin)
1564 return NULL;
1565
1566 /* Input and output of .POPCOUNT should be same-precision integer.
1567 Also A should be unsigned or same precision as temp_in,
1568 otherwise there would be sign_extend from A to temp_in. */
1569 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
1570 || (!TYPE_UNSIGNED (unprom_diff.type)
1571 && (TYPE_PRECISION (unprom_diff.type)
1572 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
1573 return NULL;
1574 vargs.safe_push (unprom_diff.op);
1575
1576 vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
1577 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1578 /* Do it only if the backend has popcount<vector_mode>2 pattern. */
1579 if (!vec_type
1580 || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1581 OPTIMIZE_FOR_SPEED))
1582 return NULL;
1583
1584 /* Create B = .POPCOUNT (A). */
1585 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1586 pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
1587 gimple_call_set_lhs (pattern_stmt, new_var);
1588 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1589 *type_out = vec_type;
1590
1591 if (dump_enabled_p ())
1592 dump_printf_loc (MSG_NOTE, vect_location,
1593 "created pattern stmt: %G", pattern_stmt);
1594 return pattern_stmt;
1595 }
1596
1597 /* Function vect_recog_pow_pattern
1598
1599 Try to find the following pattern:
1600
1601 x = POW (y, N);
1602
1603 with POW being one of pow, powf, powi, powif and N being
1604 either 2 or 0.5.
1605
1606 Input:
1607
1608 * STMT_VINFO: The stmt from which the pattern search begins.
1609
1610 Output:
1611
1612 * TYPE_OUT: The type of the output of this pattern.
1613
1614 * Return value: A new stmt that will be used to replace the sequence of
1615 stmts that constitute the pattern. In this case it will be:
1616 x = x * x
1617 or
1618 x = sqrt (x)
1619 */
1620
1621 static gimple *
1622 vect_recog_pow_pattern (vec_info *vinfo,
1623 stmt_vec_info stmt_vinfo, tree *type_out)
1624 {
1625 gimple *last_stmt = stmt_vinfo->stmt;
1626 tree base, exp;
1627 gimple *stmt;
1628 tree var;
1629
1630 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1631 return NULL;
1632
1633 switch (gimple_call_combined_fn (last_stmt))
1634 {
1635 CASE_CFN_POW:
1636 CASE_CFN_POWI:
1637 break;
1638
1639 default:
1640 return NULL;
1641 }
1642
1643 base = gimple_call_arg (last_stmt, 0);
1644 exp = gimple_call_arg (last_stmt, 1);
1645 if (TREE_CODE (exp) != REAL_CST
1646 && TREE_CODE (exp) != INTEGER_CST)
1647 {
1648 if (flag_unsafe_math_optimizations
1649 && TREE_CODE (base) == REAL_CST
1650 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1651 {
1652 combined_fn log_cfn;
1653 built_in_function exp_bfn;
1654 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1655 {
1656 case BUILT_IN_POW:
1657 log_cfn = CFN_BUILT_IN_LOG;
1658 exp_bfn = BUILT_IN_EXP;
1659 break;
1660 case BUILT_IN_POWF:
1661 log_cfn = CFN_BUILT_IN_LOGF;
1662 exp_bfn = BUILT_IN_EXPF;
1663 break;
1664 case BUILT_IN_POWL:
1665 log_cfn = CFN_BUILT_IN_LOGL;
1666 exp_bfn = BUILT_IN_EXPL;
1667 break;
1668 default:
1669 return NULL;
1670 }
1671 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1672 tree exp_decl = builtin_decl_implicit (exp_bfn);
1673 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1674 does that, but if C is a power of 2, we want to use
1675 exp2 (log2 (C) * x) in the non-vectorized version, but for
1676 vectorization we don't have vectorized exp2. */
1677 if (logc
1678 && TREE_CODE (logc) == REAL_CST
1679 && exp_decl
1680 && lookup_attribute ("omp declare simd",
1681 DECL_ATTRIBUTES (exp_decl)))
1682 {
1683 cgraph_node *node = cgraph_node::get_create (exp_decl);
1684 if (node->simd_clones == NULL)
1685 {
1686 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1687 || node->definition)
1688 return NULL;
1689 expand_simd_clones (node);
1690 if (node->simd_clones == NULL)
1691 return NULL;
1692 }
1693 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1694 if (!*type_out)
1695 return NULL;
1696 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1697 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1698 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1699 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1700 g = gimple_build_call (exp_decl, 1, def);
1701 gimple_call_set_lhs (g, res);
1702 return g;
1703 }
1704 }
1705
1706 return NULL;
1707 }
1708
1709 /* We now have a pow or powi builtin function call with a constant
1710 exponent. */
1711
1712 /* Catch squaring. */
1713 if ((tree_fits_shwi_p (exp)
1714 && tree_to_shwi (exp) == 2)
1715 || (TREE_CODE (exp) == REAL_CST
1716 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1717 {
1718 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1719 TREE_TYPE (base), type_out))
1720 return NULL;
1721
1722 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1723 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1724 return stmt;
1725 }
1726
1727 /* Catch square root. */
1728 if (TREE_CODE (exp) == REAL_CST
1729 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1730 {
1731 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1732 if (*type_out
1733 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1734 OPTIMIZE_FOR_SPEED))
1735 {
1736 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1737 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1738 gimple_call_set_lhs (stmt, var);
1739 gimple_call_set_nothrow (stmt, true);
1740 return stmt;
1741 }
1742 }
1743
1744 return NULL;
1745 }
1746
1747
1748 /* Function vect_recog_widen_sum_pattern
1749
1750 Try to find the following pattern:
1751
1752 type x_t;
1753 TYPE x_T, sum = init;
1754 loop:
1755 sum_0 = phi <init, sum_1>
1756 S1 x_t = *p;
1757 S2 x_T = (TYPE) x_t;
1758 S3 sum_1 = x_T + sum_0;
1759
1760 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1761 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1762 a special case of a reduction computation.
1763
1764 Input:
1765
1766 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1767 when this function is called with S3, the pattern {S2,S3} will be detected.
1768
1769 Output:
1770
1771 * TYPE_OUT: The type of the output of this pattern.
1772
1773 * Return value: A new stmt that will be used to replace the sequence of
1774 stmts that constitute the pattern. In this case it will be:
1775 WIDEN_SUM <x_t, sum_0>
1776
1777 Note: The widening-sum idiom is a widening reduction pattern that is
1778 vectorized without preserving all the intermediate results. It
1779 produces only N/2 (widened) results (by summing up pairs of
1780 intermediate results) rather than all N results. Therefore, we
1781 cannot allow this pattern when we want to get all the results and in
1782 the correct order (as is the case when this computation is in an
1783 inner-loop nested in an outer-loop that us being vectorized). */
1784
1785 static gimple *
1786 vect_recog_widen_sum_pattern (vec_info *vinfo,
1787 stmt_vec_info stmt_vinfo, tree *type_out)
1788 {
1789 gimple *last_stmt = stmt_vinfo->stmt;
1790 tree oprnd0, oprnd1;
1791 tree type;
1792 gimple *pattern_stmt;
1793 tree var;
1794
1795 /* Look for the following pattern
1796 DX = (TYPE) X;
1797 sum_1 = DX + sum_0;
1798 In which DX is at least double the size of X, and sum_1 has been
1799 recognized as a reduction variable.
1800 */
1801
1802 /* Starting from LAST_STMT, follow the defs of its uses in search
1803 of the above pattern. */
1804
1805 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1806 &oprnd0, &oprnd1))
1807 return NULL;
1808
1809 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1810
1811 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1812 we know that oprnd1 is the reduction variable (defined by a loop-header
1813 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1814 Left to check that oprnd0 is defined by a cast from type 'type' to type
1815 'TYPE'. */
1816
1817 vect_unpromoted_value unprom0;
1818 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1819 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1820 return NULL;
1821
1822 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1823
1824 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1825 unprom0.type, type_out))
1826 return NULL;
1827
1828 var = vect_recog_temp_ssa_var (type, NULL);
1829 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1830
1831 return pattern_stmt;
1832 }
1833
1834 /* Function vect_recog_bitfield_ref_pattern
1835
1836 Try to find the following pattern:
1837
1838 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
1839 result = (type_out) bf_value;
1840
1841 where type_out is a non-bitfield type, that is to say, it's precision matches
1842 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
1843
1844 Input:
1845
1846 * STMT_VINFO: The stmt from which the pattern search begins.
1847 here it starts with:
1848 result = (type_out) bf_value;
1849
1850 Output:
1851
1852 * TYPE_OUT: The vector type of the output of this pattern.
1853
1854 * Return value: A new stmt that will be used to replace the sequence of
1855 stmts that constitute the pattern. If the precision of type_out is bigger
1856 than the precision type of _1 we perform the widening before the shifting,
1857 since the new precision will be large enough to shift the value and moving
1858 widening operations up the statement chain enables the generation of
1859 widening loads. If we are widening and the operation after the pattern is
1860 an addition then we mask first and shift later, to enable the generation of
1861 shifting adds. In the case of narrowing we will always mask first, shift
1862 last and then perform a narrowing operation. This will enable the
1863 generation of narrowing shifts.
1864
1865 Widening with mask first, shift later:
1866 container = (type_out) container;
1867 masked = container & (((1 << bitsize) - 1) << bitpos);
1868 result = patt2 >> masked;
1869
1870 Widening with shift first, mask last:
1871 container = (type_out) container;
1872 shifted = container >> bitpos;
1873 result = shifted & ((1 << bitsize) - 1);
1874
1875 Narrowing:
1876 masked = container & (((1 << bitsize) - 1) << bitpos);
1877 result = masked >> bitpos;
1878 result = (type_out) result;
1879
1880 The shifting is always optional depending on whether bitpos != 0.
1881
1882 */
1883
1884 static gimple *
1885 vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
1886 tree *type_out)
1887 {
1888 gassign *first_stmt = dyn_cast <gassign *> (stmt_info->stmt);
1889
1890 if (!first_stmt)
1891 return NULL;
1892
1893 gassign *bf_stmt;
1894 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (first_stmt))
1895 && TREE_CODE (gimple_assign_rhs1 (first_stmt)) == SSA_NAME)
1896 {
1897 gimple *second_stmt
1898 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (first_stmt));
1899 bf_stmt = dyn_cast <gassign *> (second_stmt);
1900 if (!bf_stmt
1901 || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
1902 return NULL;
1903 }
1904 else
1905 return NULL;
1906
1907 tree bf_ref = gimple_assign_rhs1 (bf_stmt);
1908 tree container = TREE_OPERAND (bf_ref, 0);
1909
1910 if (!bit_field_offset (bf_ref).is_constant ()
1911 || !bit_field_size (bf_ref).is_constant ()
1912 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
1913 return NULL;
1914
1915 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
1916 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
1917 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
1918 return NULL;
1919
1920 gimple *use_stmt, *pattern_stmt;
1921 use_operand_p use_p;
1922 tree ret = gimple_assign_lhs (first_stmt);
1923 tree ret_type = TREE_TYPE (ret);
1924 bool shift_first = true;
1925 tree container_type = TREE_TYPE (container);
1926 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
1927
1928 /* We move the conversion earlier if the loaded type is smaller than the
1929 return type to enable the use of widening loads. */
1930 if (TYPE_PRECISION (TREE_TYPE (container)) < TYPE_PRECISION (ret_type)
1931 && !useless_type_conversion_p (TREE_TYPE (container), ret_type))
1932 {
1933 pattern_stmt
1934 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
1935 NOP_EXPR, container);
1936 container = gimple_get_lhs (pattern_stmt);
1937 container_type = TREE_TYPE (container);
1938 vectype = get_vectype_for_scalar_type (vinfo, container_type);
1939 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
1940 }
1941 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
1942 /* If we are doing the conversion last then also delay the shift as we may
1943 be able to combine the shift and conversion in certain cases. */
1944 shift_first = false;
1945
1946 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
1947 PLUS_EXPR then do the shift last as some targets can combine the shift and
1948 add into a single instruction. */
1949 if (single_imm_use (gimple_assign_lhs (first_stmt), &use_p, &use_stmt))
1950 {
1951 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
1952 && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
1953 shift_first = false;
1954 }
1955
1956 unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
1957 unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
1958 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
1959 if (BYTES_BIG_ENDIAN)
1960 shift_n = prec - shift_n - mask_width;
1961
1962 /* If we don't have to shift we only generate the mask, so just fix the
1963 code-path to shift_first. */
1964 if (shift_n == 0)
1965 shift_first = true;
1966
1967 tree result;
1968 if (shift_first)
1969 {
1970 tree shifted = container;
1971 if (shift_n)
1972 {
1973 pattern_stmt
1974 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
1975 RSHIFT_EXPR, container,
1976 build_int_cst (sizetype, shift_n));
1977 shifted = gimple_assign_lhs (pattern_stmt);
1978 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
1979 }
1980
1981 tree mask = wide_int_to_tree (container_type,
1982 wi::mask (mask_width, false, prec));
1983
1984 pattern_stmt
1985 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
1986 BIT_AND_EXPR, shifted, mask);
1987 result = gimple_assign_lhs (pattern_stmt);
1988 }
1989 else
1990 {
1991 tree mask = wide_int_to_tree (container_type,
1992 wi::shifted_mask (shift_n, mask_width,
1993 false, prec));
1994 pattern_stmt
1995 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
1996 BIT_AND_EXPR, container, mask);
1997 tree masked = gimple_assign_lhs (pattern_stmt);
1998
1999 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2000 pattern_stmt
2001 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2002 RSHIFT_EXPR, masked,
2003 build_int_cst (sizetype, shift_n));
2004 result = gimple_assign_lhs (pattern_stmt);
2005 }
2006
2007 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2008 {
2009 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2010 pattern_stmt
2011 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2012 NOP_EXPR, result);
2013 }
2014
2015 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2016 vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2017
2018 return pattern_stmt;
2019 }
2020
2021 /* Function vect_recog_bit_insert_pattern
2022
2023 Try to find the following pattern:
2024
2025 written = BIT_INSERT_EXPR (container, value, bitpos);
2026
2027 Input:
2028
2029 * STMT_VINFO: The stmt we want to replace.
2030
2031 Output:
2032
2033 * TYPE_OUT: The vector type of the output of this pattern.
2034
2035 * Return value: A new stmt that will be used to replace the sequence of
2036 stmts that constitute the pattern. In this case it will be:
2037 value = (container_type) value; // Make sure
2038 shifted = value << bitpos; // Shift value into place
2039 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2040 // the 'to-write value'.
2041 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2042 // write to from the value we want
2043 // to write to.
2044 written = cleared | masked; // Write bits.
2045
2046
2047 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2048 bits corresponding to the real size of the bitfield value we are writing to.
2049 The shifting is always optional depending on whether bitpos != 0.
2050
2051 */
2052
2053 static gimple *
2054 vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2055 tree *type_out)
2056 {
2057 gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2058 if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2059 return NULL;
2060
2061 tree container = gimple_assign_rhs1 (bf_stmt);
2062 tree value = gimple_assign_rhs2 (bf_stmt);
2063 tree shift = gimple_assign_rhs3 (bf_stmt);
2064
2065 tree bf_type = TREE_TYPE (value);
2066 tree container_type = TREE_TYPE (container);
2067
2068 if (!INTEGRAL_TYPE_P (container_type)
2069 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2070 return NULL;
2071
2072 gimple *pattern_stmt;
2073
2074 vect_unpromoted_value unprom;
2075 unprom.set_op (value, vect_internal_def);
2076 value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2077 get_vectype_for_scalar_type (vinfo,
2078 container_type));
2079
2080 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2081 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2082 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2083 if (BYTES_BIG_ENDIAN)
2084 {
2085 shift_n = prec - shift_n - mask_width;
2086 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2087 }
2088
2089 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2090 {
2091 pattern_stmt =
2092 gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2093 NOP_EXPR, value);
2094 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2095 value = gimple_get_lhs (pattern_stmt);
2096 }
2097
2098 /* Shift VALUE into place. */
2099 tree shifted = value;
2100 if (shift_n)
2101 {
2102 gimple_seq stmts = NULL;
2103 shifted
2104 = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2105 if (!gimple_seq_empty_p (stmts))
2106 append_pattern_def_seq (vinfo, stmt_info,
2107 gimple_seq_first_stmt (stmts));
2108 }
2109
2110 tree mask_t
2111 = wide_int_to_tree (container_type,
2112 wi::shifted_mask (shift_n, mask_width, false, prec));
2113
2114 /* Clear bits we don't want to write back from SHIFTED. */
2115 gimple_seq stmts = NULL;
2116 tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2117 mask_t);
2118 if (!gimple_seq_empty_p (stmts))
2119 {
2120 pattern_stmt = gimple_seq_first_stmt (stmts);
2121 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2122 }
2123
2124 /* Mask off the bits in the container that we are to write to. */
2125 mask_t = wide_int_to_tree (container_type,
2126 wi::shifted_mask (shift_n, mask_width, true, prec));
2127 tree cleared = vect_recog_temp_ssa_var (container_type);
2128 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2129 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2130
2131 /* Write MASKED into CLEARED. */
2132 pattern_stmt
2133 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2134 BIT_IOR_EXPR, cleared, masked);
2135
2136 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2137 vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2138
2139 return pattern_stmt;
2140 }
2141
2142
2143 /* Recognize cases in which an operation is performed in one type WTYPE
2144 but could be done more efficiently in a narrower type NTYPE. For example,
2145 if we have:
2146
2147 ATYPE a; // narrower than NTYPE
2148 BTYPE b; // narrower than NTYPE
2149 WTYPE aw = (WTYPE) a;
2150 WTYPE bw = (WTYPE) b;
2151 WTYPE res = aw + bw; // only uses of aw and bw
2152
2153 then it would be more efficient to do:
2154
2155 NTYPE an = (NTYPE) a;
2156 NTYPE bn = (NTYPE) b;
2157 NTYPE resn = an + bn;
2158 WTYPE res = (WTYPE) resn;
2159
2160 Other situations include things like:
2161
2162 ATYPE a; // NTYPE or narrower
2163 WTYPE aw = (WTYPE) a;
2164 WTYPE res = aw + b;
2165
2166 when only "(NTYPE) res" is significant. In that case it's more efficient
2167 to truncate "b" and do the operation on NTYPE instead:
2168
2169 NTYPE an = (NTYPE) a;
2170 NTYPE bn = (NTYPE) b; // truncation
2171 NTYPE resn = an + bn;
2172 WTYPE res = (WTYPE) resn;
2173
2174 All users of "res" should then use "resn" instead, making the final
2175 statement dead (not marked as relevant). The final statement is still
2176 needed to maintain the type correctness of the IR.
2177
2178 vect_determine_precisions has already determined the minimum
2179 precison of the operation and the minimum precision required
2180 by users of the result. */
2181
2182 static gimple *
2183 vect_recog_over_widening_pattern (vec_info *vinfo,
2184 stmt_vec_info last_stmt_info, tree *type_out)
2185 {
2186 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2187 if (!last_stmt)
2188 return NULL;
2189
2190 /* See whether we have found that this operation can be done on a
2191 narrower type without changing its semantics. */
2192 unsigned int new_precision = last_stmt_info->operation_precision;
2193 if (!new_precision)
2194 return NULL;
2195
2196 tree lhs = gimple_assign_lhs (last_stmt);
2197 tree type = TREE_TYPE (lhs);
2198 tree_code code = gimple_assign_rhs_code (last_stmt);
2199
2200 /* Punt for reductions where we don't handle the type conversions. */
2201 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
2202 return NULL;
2203
2204 /* Keep the first operand of a COND_EXPR as-is: only the other two
2205 operands are interesting. */
2206 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
2207
2208 /* Check the operands. */
2209 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
2210 auto_vec <vect_unpromoted_value, 3> unprom (nops);
2211 unprom.quick_grow (nops);
2212 unsigned int min_precision = 0;
2213 bool single_use_p = false;
2214 for (unsigned int i = 0; i < nops; ++i)
2215 {
2216 tree op = gimple_op (last_stmt, first_op + i);
2217 if (TREE_CODE (op) == INTEGER_CST)
2218 unprom[i].set_op (op, vect_constant_def);
2219 else if (TREE_CODE (op) == SSA_NAME)
2220 {
2221 bool op_single_use_p = true;
2222 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
2223 &op_single_use_p))
2224 return NULL;
2225 /* If:
2226
2227 (1) N bits of the result are needed;
2228 (2) all inputs are widened from M<N bits; and
2229 (3) one operand OP is a single-use SSA name
2230
2231 we can shift the M->N widening from OP to the output
2232 without changing the number or type of extensions involved.
2233 This then reduces the number of copies of STMT_INFO.
2234
2235 If instead of (3) more than one operand is a single-use SSA name,
2236 shifting the extension to the output is even more of a win.
2237
2238 If instead:
2239
2240 (1) N bits of the result are needed;
2241 (2) one operand OP2 is widened from M2<N bits;
2242 (3) another operand OP1 is widened from M1<M2 bits; and
2243 (4) both OP1 and OP2 are single-use
2244
2245 the choice is between:
2246
2247 (a) truncating OP2 to M1, doing the operation on M1,
2248 and then widening the result to N
2249
2250 (b) widening OP1 to M2, doing the operation on M2, and then
2251 widening the result to N
2252
2253 Both shift the M2->N widening of the inputs to the output.
2254 (a) additionally shifts the M1->M2 widening to the output;
2255 it requires fewer copies of STMT_INFO but requires an extra
2256 M2->M1 truncation.
2257
2258 Which is better will depend on the complexity and cost of
2259 STMT_INFO, which is hard to predict at this stage. However,
2260 a clear tie-breaker in favor of (b) is the fact that the
2261 truncation in (a) increases the length of the operation chain.
2262
2263 If instead of (4) only one of OP1 or OP2 is single-use,
2264 (b) is still a win over doing the operation in N bits:
2265 it still shifts the M2->N widening on the single-use operand
2266 to the output and reduces the number of STMT_INFO copies.
2267
2268 If neither operand is single-use then operating on fewer than
2269 N bits might lead to more extensions overall. Whether it does
2270 or not depends on global information about the vectorization
2271 region, and whether that's a good trade-off would again
2272 depend on the complexity and cost of the statements involved,
2273 as well as things like register pressure that are not normally
2274 modelled at this stage. We therefore ignore these cases
2275 and just optimize the clear single-use wins above.
2276
2277 Thus we take the maximum precision of the unpromoted operands
2278 and record whether any operand is single-use. */
2279 if (unprom[i].dt == vect_internal_def)
2280 {
2281 min_precision = MAX (min_precision,
2282 TYPE_PRECISION (unprom[i].type));
2283 single_use_p |= op_single_use_p;
2284 }
2285 }
2286 else
2287 return NULL;
2288 }
2289
2290 /* Although the operation could be done in operation_precision, we have
2291 to balance that against introducing extra truncations or extensions.
2292 Calculate the minimum precision that can be handled efficiently.
2293
2294 The loop above determined that the operation could be handled
2295 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
2296 extension from the inputs to the output without introducing more
2297 instructions, and would reduce the number of instructions required
2298 for STMT_INFO itself.
2299
2300 vect_determine_precisions has also determined that the result only
2301 needs min_output_precision bits. Truncating by a factor of N times
2302 requires a tree of N - 1 instructions, so if TYPE is N times wider
2303 than min_output_precision, doing the operation in TYPE and truncating
2304 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
2305 In contrast:
2306
2307 - truncating the input to a unary operation and doing the operation
2308 in the new type requires at most N - 1 + 1 = N instructions per
2309 output vector
2310
2311 - doing the same for a binary operation requires at most
2312 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
2313
2314 Both unary and binary operations require fewer instructions than
2315 this if the operands were extended from a suitable truncated form.
2316 Thus there is usually nothing to lose by doing operations in
2317 min_output_precision bits, but there can be something to gain. */
2318 if (!single_use_p)
2319 min_precision = last_stmt_info->min_output_precision;
2320 else
2321 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
2322
2323 /* Apply the minimum efficient precision we just calculated. */
2324 if (new_precision < min_precision)
2325 new_precision = min_precision;
2326 new_precision = vect_element_precision (new_precision);
2327 if (new_precision >= TYPE_PRECISION (type))
2328 return NULL;
2329
2330 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
2331
2332 *type_out = get_vectype_for_scalar_type (vinfo, type);
2333 if (!*type_out)
2334 return NULL;
2335
2336 /* We've found a viable pattern. Get the new type of the operation. */
2337 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
2338 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
2339
2340 /* If we're truncating an operation, we need to make sure that we
2341 don't introduce new undefined overflow. The codes tested here are
2342 a subset of those accepted by vect_truncatable_operation_p. */
2343 tree op_type = new_type;
2344 if (TYPE_OVERFLOW_UNDEFINED (new_type)
2345 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
2346 op_type = build_nonstandard_integer_type (new_precision, true);
2347
2348 /* We specifically don't check here whether the target supports the
2349 new operation, since it might be something that a later pattern
2350 wants to rewrite anyway. If targets have a minimum element size
2351 for some optabs, we should pattern-match smaller ops to larger ops
2352 where beneficial. */
2353 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2354 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
2355 if (!new_vectype || !op_vectype)
2356 return NULL;
2357
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
2360 type, new_type);
2361
2362 /* Calculate the rhs operands for an operation on OP_TYPE. */
2363 tree ops[3] = {};
2364 for (unsigned int i = 1; i < first_op; ++i)
2365 ops[i - 1] = gimple_op (last_stmt, i);
2366 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
2367 op_type, &unprom[0], op_vectype);
2368
2369 /* Use the operation to produce a result of type OP_TYPE. */
2370 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
2371 gimple *pattern_stmt = gimple_build_assign (new_var, code,
2372 ops[0], ops[1], ops[2]);
2373 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2374
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_NOTE, vect_location,
2377 "created pattern stmt: %G", pattern_stmt);
2378
2379 /* Convert back to the original signedness, if OP_TYPE is different
2380 from NEW_TYPE. */
2381 if (op_type != new_type)
2382 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
2383 pattern_stmt, op_vectype);
2384
2385 /* Promote the result to the original type. */
2386 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
2387 pattern_stmt, new_vectype);
2388
2389 return pattern_stmt;
2390 }
2391
2392 /* Recognize the following patterns:
2393
2394 ATYPE a; // narrower than TYPE
2395 BTYPE b; // narrower than TYPE
2396
2397 1) Multiply high with scaling
2398 TYPE res = ((TYPE) a * (TYPE) b) >> c;
2399 Here, c is bitsize (TYPE) / 2 - 1.
2400
2401 2) ... or also with rounding
2402 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
2403 Here, d is bitsize (TYPE) / 2 - 2.
2404
2405 3) Normal multiply high
2406 TYPE res = ((TYPE) a * (TYPE) b) >> e;
2407 Here, e is bitsize (TYPE) / 2.
2408
2409 where only the bottom half of res is used. */
2410
2411 static gimple *
2412 vect_recog_mulhs_pattern (vec_info *vinfo,
2413 stmt_vec_info last_stmt_info, tree *type_out)
2414 {
2415 /* Check for a right shift. */
2416 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2417 if (!last_stmt
2418 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
2419 return NULL;
2420
2421 /* Check that the shift result is wider than the users of the
2422 result need (i.e. that narrowing would be a natural choice). */
2423 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
2424 unsigned int target_precision
2425 = vect_element_precision (last_stmt_info->min_output_precision);
2426 if (!INTEGRAL_TYPE_P (lhs_type)
2427 || target_precision >= TYPE_PRECISION (lhs_type))
2428 return NULL;
2429
2430 /* Look through any change in sign on the outer shift input. */
2431 vect_unpromoted_value unprom_rshift_input;
2432 tree rshift_input = vect_look_through_possible_promotion
2433 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
2434 if (!rshift_input
2435 || TYPE_PRECISION (TREE_TYPE (rshift_input))
2436 != TYPE_PRECISION (lhs_type))
2437 return NULL;
2438
2439 /* Get the definition of the shift input. */
2440 stmt_vec_info rshift_input_stmt_info
2441 = vect_get_internal_def (vinfo, rshift_input);
2442 if (!rshift_input_stmt_info)
2443 return NULL;
2444 gassign *rshift_input_stmt
2445 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
2446 if (!rshift_input_stmt)
2447 return NULL;
2448
2449 stmt_vec_info mulh_stmt_info;
2450 tree scale_term;
2451 bool rounding_p = false;
2452
2453 /* Check for the presence of the rounding term. */
2454 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
2455 {
2456 /* Check that the outer shift was by 1. */
2457 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
2458 return NULL;
2459
2460 /* Check that the second operand of the PLUS_EXPR is 1. */
2461 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2462 return NULL;
2463
2464 /* Look through any change in sign on the addition input. */
2465 vect_unpromoted_value unprom_plus_input;
2466 tree plus_input = vect_look_through_possible_promotion
2467 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2468 if (!plus_input
2469 || TYPE_PRECISION (TREE_TYPE (plus_input))
2470 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
2471 return NULL;
2472
2473 /* Get the definition of the multiply-high-scale part. */
2474 stmt_vec_info plus_input_stmt_info
2475 = vect_get_internal_def (vinfo, plus_input);
2476 if (!plus_input_stmt_info)
2477 return NULL;
2478 gassign *plus_input_stmt
2479 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2480 if (!plus_input_stmt
2481 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2482 return NULL;
2483
2484 /* Look through any change in sign on the scaling input. */
2485 vect_unpromoted_value unprom_scale_input;
2486 tree scale_input = vect_look_through_possible_promotion
2487 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2488 if (!scale_input
2489 || TYPE_PRECISION (TREE_TYPE (scale_input))
2490 != TYPE_PRECISION (TREE_TYPE (plus_input)))
2491 return NULL;
2492
2493 /* Get the definition of the multiply-high part. */
2494 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2495 if (!mulh_stmt_info)
2496 return NULL;
2497
2498 /* Get the scaling term. */
2499 scale_term = gimple_assign_rhs2 (plus_input_stmt);
2500 rounding_p = true;
2501 }
2502 else
2503 {
2504 mulh_stmt_info = rshift_input_stmt_info;
2505 scale_term = gimple_assign_rhs2 (last_stmt);
2506 }
2507
2508 /* Check that the scaling factor is constant. */
2509 if (TREE_CODE (scale_term) != INTEGER_CST)
2510 return NULL;
2511
2512 /* Check whether the scaling input term can be seen as two widened
2513 inputs multiplied together. */
2514 vect_unpromoted_value unprom_mult[2];
2515 tree new_type;
2516 unsigned int nops
2517 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2518 false, 2, unprom_mult, &new_type);
2519 if (nops != 2)
2520 return NULL;
2521
2522 /* Adjust output precision. */
2523 if (TYPE_PRECISION (new_type) < target_precision)
2524 new_type = build_nonstandard_integer_type
2525 (target_precision, TYPE_UNSIGNED (new_type));
2526
2527 unsigned mult_precision = TYPE_PRECISION (new_type);
2528 internal_fn ifn;
2529 /* Check that the scaling factor is expected. Instead of
2530 target_precision, we should use the one that we actually
2531 use for internal function. */
2532 if (rounding_p)
2533 {
2534 /* Check pattern 2). */
2535 if (wi::to_widest (scale_term) + mult_precision + 2
2536 != TYPE_PRECISION (lhs_type))
2537 return NULL;
2538
2539 ifn = IFN_MULHRS;
2540 }
2541 else
2542 {
2543 /* Check for pattern 1). */
2544 if (wi::to_widest (scale_term) + mult_precision + 1
2545 == TYPE_PRECISION (lhs_type))
2546 ifn = IFN_MULHS;
2547 /* Check for pattern 3). */
2548 else if (wi::to_widest (scale_term) + mult_precision
2549 == TYPE_PRECISION (lhs_type))
2550 ifn = IFN_MULH;
2551 else
2552 return NULL;
2553 }
2554
2555 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2556
2557 /* Check for target support. */
2558 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2559 if (!new_vectype
2560 || !direct_internal_fn_supported_p
2561 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2562 return NULL;
2563
2564 /* The IR requires a valid vector type for the cast result, even though
2565 it's likely to be discarded. */
2566 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2567 if (!*type_out)
2568 return NULL;
2569
2570 /* Generate the IFN_MULHRS call. */
2571 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2572 tree new_ops[2];
2573 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2574 unprom_mult, new_vectype);
2575 gcall *mulhrs_stmt
2576 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
2577 gimple_call_set_lhs (mulhrs_stmt, new_var);
2578 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
2579
2580 if (dump_enabled_p ())
2581 dump_printf_loc (MSG_NOTE, vect_location,
2582 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
2583
2584 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
2585 mulhrs_stmt, new_vectype);
2586 }
2587
2588 /* Recognize the patterns:
2589
2590 ATYPE a; // narrower than TYPE
2591 BTYPE b; // narrower than TYPE
2592 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2593 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2594
2595 where only the bottom half of avg is used. Try to transform them into:
2596
2597 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2598 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2599
2600 followed by:
2601
2602 TYPE avg = (TYPE) avg';
2603
2604 where NTYPE is no wider than half of TYPE. Since only the bottom half
2605 of avg is used, all or part of the cast of avg' should become redundant.
2606
2607 If there is no target support available, generate code to distribute rshift
2608 over plus and add a carry. */
2609
2610 static gimple *
2611 vect_recog_average_pattern (vec_info *vinfo,
2612 stmt_vec_info last_stmt_info, tree *type_out)
2613 {
2614 /* Check for a shift right by one bit. */
2615 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2616 if (!last_stmt
2617 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
2618 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
2619 return NULL;
2620
2621 /* Check that the shift result is wider than the users of the
2622 result need (i.e. that narrowing would be a natural choice). */
2623 tree lhs = gimple_assign_lhs (last_stmt);
2624 tree type = TREE_TYPE (lhs);
2625 unsigned int target_precision
2626 = vect_element_precision (last_stmt_info->min_output_precision);
2627 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2628 return NULL;
2629
2630 /* Look through any change in sign on the shift input. */
2631 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2632 vect_unpromoted_value unprom_plus;
2633 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2634 &unprom_plus);
2635 if (!rshift_rhs
2636 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2637 return NULL;
2638
2639 /* Get the definition of the shift input. */
2640 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2641 if (!plus_stmt_info)
2642 return NULL;
2643
2644 /* Check whether the shift input can be seen as a tree of additions on
2645 2 or 3 widened inputs.
2646
2647 Note that the pattern should be a win even if the result of one or
2648 more additions is reused elsewhere: if the pattern matches, we'd be
2649 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2650 internal_fn ifn = IFN_AVG_FLOOR;
2651 vect_unpromoted_value unprom[3];
2652 tree new_type;
2653 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2654 WIDEN_PLUS_EXPR, false, 3,
2655 unprom, &new_type);
2656 if (nops == 0)
2657 return NULL;
2658 if (nops == 3)
2659 {
2660 /* Check that one operand is 1. */
2661 unsigned int i;
2662 for (i = 0; i < 3; ++i)
2663 if (integer_onep (unprom[i].op))
2664 break;
2665 if (i == 3)
2666 return NULL;
2667 /* Throw away the 1 operand and keep the other two. */
2668 if (i < 2)
2669 unprom[i] = unprom[2];
2670 ifn = IFN_AVG_CEIL;
2671 }
2672
2673 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2674
2675 /* We know that:
2676
2677 (a) the operation can be viewed as:
2678
2679 TYPE widened0 = (TYPE) UNPROM[0];
2680 TYPE widened1 = (TYPE) UNPROM[1];
2681 TYPE tmp1 = widened0 + widened1 {+ 1};
2682 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2683
2684 (b) the first two statements are equivalent to:
2685
2686 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2687 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2688
2689 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2690 where sensible;
2691
2692 (d) all the operations can be performed correctly at twice the width of
2693 NEW_TYPE, due to the nature of the average operation; and
2694
2695 (e) users of the result of the right shift need only TARGET_PRECISION
2696 bits, where TARGET_PRECISION is no more than half of TYPE's
2697 precision.
2698
2699 Under these circumstances, the only situation in which NEW_TYPE
2700 could be narrower than TARGET_PRECISION is if widened0, widened1
2701 and an addition result are all used more than once. Thus we can
2702 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2703 as "free", whereas widening the result of the average instruction
2704 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2705 therefore better not to go narrower than TARGET_PRECISION. */
2706 if (TYPE_PRECISION (new_type) < target_precision)
2707 new_type = build_nonstandard_integer_type (target_precision,
2708 TYPE_UNSIGNED (new_type));
2709
2710 /* Check for target support. */
2711 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2712 if (!new_vectype)
2713 return NULL;
2714
2715 bool fallback_p = false;
2716
2717 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2718 ;
2719 else if (TYPE_UNSIGNED (new_type)
2720 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2721 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2722 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2723 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2724 fallback_p = true;
2725 else
2726 return NULL;
2727
2728 /* The IR requires a valid vector type for the cast result, even though
2729 it's likely to be discarded. */
2730 *type_out = get_vectype_for_scalar_type (vinfo, type);
2731 if (!*type_out)
2732 return NULL;
2733
2734 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2735 tree new_ops[2];
2736 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2737 unprom, new_vectype);
2738
2739 if (fallback_p)
2740 {
2741 /* As a fallback, generate code for following sequence:
2742
2743 shifted_op0 = new_ops[0] >> 1;
2744 shifted_op1 = new_ops[1] >> 1;
2745 sum_of_shifted = shifted_op0 + shifted_op1;
2746 unmasked_carry = new_ops[0] and/or new_ops[1];
2747 carry = unmasked_carry & 1;
2748 new_var = sum_of_shifted + carry;
2749 */
2750
2751 tree one_cst = build_one_cst (new_type);
2752 gassign *g;
2753
2754 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2755 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2756 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2757
2758 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2759 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2760 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2761
2762 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2763 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2764 shifted_op0, shifted_op1);
2765 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2766
2767 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2768 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2769 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2770 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2771
2772 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2773 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2774 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2775
2776 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2777 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2778 }
2779
2780 /* Generate the IFN_AVG* call. */
2781 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2782 new_ops[1]);
2783 gimple_call_set_lhs (average_stmt, new_var);
2784 gimple_set_location (average_stmt, gimple_location (last_stmt));
2785
2786 if (dump_enabled_p ())
2787 dump_printf_loc (MSG_NOTE, vect_location,
2788 "created pattern stmt: %G", (gimple *) average_stmt);
2789
2790 return vect_convert_output (vinfo, last_stmt_info,
2791 type, average_stmt, new_vectype);
2792 }
2793
2794 /* Recognize cases in which the input to a cast is wider than its
2795 output, and the input is fed by a widening operation. Fold this
2796 by removing the unnecessary intermediate widening. E.g.:
2797
2798 unsigned char a;
2799 unsigned int b = (unsigned int) a;
2800 unsigned short c = (unsigned short) b;
2801
2802 -->
2803
2804 unsigned short c = (unsigned short) a;
2805
2806 Although this is rare in input IR, it is an expected side-effect
2807 of the over-widening pattern above.
2808
2809 This is beneficial also for integer-to-float conversions, if the
2810 widened integer has more bits than the float, and if the unwidened
2811 input doesn't. */
2812
2813 static gimple *
2814 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2815 stmt_vec_info last_stmt_info, tree *type_out)
2816 {
2817 /* Check for a cast, including an integer-to-float conversion. */
2818 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2819 if (!last_stmt)
2820 return NULL;
2821 tree_code code = gimple_assign_rhs_code (last_stmt);
2822 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2823 return NULL;
2824
2825 /* Make sure that the rhs is a scalar with a natural bitsize. */
2826 tree lhs = gimple_assign_lhs (last_stmt);
2827 if (!lhs)
2828 return NULL;
2829 tree lhs_type = TREE_TYPE (lhs);
2830 scalar_mode lhs_mode;
2831 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2832 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2833 return NULL;
2834
2835 /* Check for a narrowing operation (from a vector point of view). */
2836 tree rhs = gimple_assign_rhs1 (last_stmt);
2837 tree rhs_type = TREE_TYPE (rhs);
2838 if (!INTEGRAL_TYPE_P (rhs_type)
2839 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2840 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2841 return NULL;
2842
2843 /* Try to find an unpromoted input. */
2844 vect_unpromoted_value unprom;
2845 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2846 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2847 return NULL;
2848
2849 /* If the bits above RHS_TYPE matter, make sure that they're the
2850 same when extending from UNPROM as they are when extending from RHS. */
2851 if (!INTEGRAL_TYPE_P (lhs_type)
2852 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2853 return NULL;
2854
2855 /* We can get the same result by casting UNPROM directly, to avoid
2856 the unnecessary widening and narrowing. */
2857 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2858
2859 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2860 if (!*type_out)
2861 return NULL;
2862
2863 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2864 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2865 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2866
2867 return pattern_stmt;
2868 }
2869
2870 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2871 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2872
2873 static gimple *
2874 vect_recog_widen_shift_pattern (vec_info *vinfo,
2875 stmt_vec_info last_stmt_info, tree *type_out)
2876 {
2877 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2878 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2879 "vect_recog_widen_shift_pattern");
2880 }
2881
2882 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2883
2884 type a_t, b_t, c_t;
2885
2886 S0 a_t = b_t r<< c_t;
2887
2888 Input/Output:
2889
2890 * STMT_VINFO: The stmt from which the pattern search begins,
2891 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2892 with a sequence:
2893
2894 S1 d_t = -c_t;
2895 S2 e_t = d_t & (B - 1);
2896 S3 f_t = b_t << c_t;
2897 S4 g_t = b_t >> e_t;
2898 S0 a_t = f_t | g_t;
2899
2900 where B is element bitsize of type.
2901
2902 Output:
2903
2904 * TYPE_OUT: The type of the output of this pattern.
2905
2906 * Return value: A new stmt that will be used to replace the rotate
2907 S0 stmt. */
2908
2909 static gimple *
2910 vect_recog_rotate_pattern (vec_info *vinfo,
2911 stmt_vec_info stmt_vinfo, tree *type_out)
2912 {
2913 gimple *last_stmt = stmt_vinfo->stmt;
2914 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2915 gimple *pattern_stmt, *def_stmt;
2916 enum tree_code rhs_code;
2917 enum vect_def_type dt;
2918 optab optab1, optab2;
2919 edge ext_def = NULL;
2920 bool bswap16_p = false;
2921
2922 if (is_gimple_assign (last_stmt))
2923 {
2924 rhs_code = gimple_assign_rhs_code (last_stmt);
2925 switch (rhs_code)
2926 {
2927 case LROTATE_EXPR:
2928 case RROTATE_EXPR:
2929 break;
2930 default:
2931 return NULL;
2932 }
2933
2934 lhs = gimple_assign_lhs (last_stmt);
2935 oprnd0 = gimple_assign_rhs1 (last_stmt);
2936 type = TREE_TYPE (oprnd0);
2937 oprnd1 = gimple_assign_rhs2 (last_stmt);
2938 }
2939 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2940 {
2941 /* __builtin_bswap16 (x) is another form of x r>> 8.
2942 The vectorizer has bswap support, but only if the argument isn't
2943 promoted. */
2944 lhs = gimple_call_lhs (last_stmt);
2945 oprnd0 = gimple_call_arg (last_stmt, 0);
2946 type = TREE_TYPE (oprnd0);
2947 if (!lhs
2948 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2949 || TYPE_PRECISION (type) <= 16
2950 || TREE_CODE (oprnd0) != SSA_NAME
2951 || BITS_PER_UNIT != 8)
2952 return NULL;
2953
2954 stmt_vec_info def_stmt_info;
2955 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2956 return NULL;
2957
2958 if (dt != vect_internal_def)
2959 return NULL;
2960
2961 if (gimple_assign_cast_p (def_stmt))
2962 {
2963 def = gimple_assign_rhs1 (def_stmt);
2964 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2965 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2966 oprnd0 = def;
2967 }
2968
2969 type = TREE_TYPE (lhs);
2970 vectype = get_vectype_for_scalar_type (vinfo, type);
2971 if (vectype == NULL_TREE)
2972 return NULL;
2973
2974 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2975 {
2976 /* The encoding uses one stepped pattern for each byte in the
2977 16-bit word. */
2978 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2979 for (unsigned i = 0; i < 3; ++i)
2980 for (unsigned j = 0; j < 2; ++j)
2981 elts.quick_push ((i + 1) * 2 - j - 1);
2982
2983 vec_perm_indices indices (elts, 1,
2984 TYPE_VECTOR_SUBPARTS (char_vectype));
2985 machine_mode vmode = TYPE_MODE (char_vectype);
2986 if (can_vec_perm_const_p (vmode, vmode, indices))
2987 {
2988 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2989 undo the argument promotion. */
2990 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2991 {
2992 def = vect_recog_temp_ssa_var (type, NULL);
2993 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2994 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2995 oprnd0 = def;
2996 }
2997
2998 /* Pattern detected. */
2999 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3000
3001 *type_out = vectype;
3002
3003 /* Pattern supported. Create a stmt to be used to replace the
3004 pattern, with the unpromoted argument. */
3005 var = vect_recog_temp_ssa_var (type, NULL);
3006 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3007 1, oprnd0);
3008 gimple_call_set_lhs (pattern_stmt, var);
3009 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3010 gimple_call_fntype (last_stmt));
3011 return pattern_stmt;
3012 }
3013 }
3014
3015 oprnd1 = build_int_cst (integer_type_node, 8);
3016 rhs_code = LROTATE_EXPR;
3017 bswap16_p = true;
3018 }
3019 else
3020 return NULL;
3021
3022 if (TREE_CODE (oprnd0) != SSA_NAME
3023 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
3024 || !INTEGRAL_TYPE_P (type))
3025 return NULL;
3026
3027 stmt_vec_info def_stmt_info;
3028 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3029 return NULL;
3030
3031 if (dt != vect_internal_def
3032 && dt != vect_constant_def
3033 && dt != vect_external_def)
3034 return NULL;
3035
3036 vectype = get_vectype_for_scalar_type (vinfo, type);
3037 if (vectype == NULL_TREE)
3038 return NULL;
3039
3040 /* If vector/vector or vector/scalar rotate is supported by the target,
3041 don't do anything here. */
3042 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3043 if (optab1
3044 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3045 {
3046 use_rotate:
3047 if (bswap16_p)
3048 {
3049 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3050 {
3051 def = vect_recog_temp_ssa_var (type, NULL);
3052 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3053 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3054 oprnd0 = def;
3055 }
3056
3057 /* Pattern detected. */
3058 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3059
3060 *type_out = vectype;
3061
3062 /* Pattern supported. Create a stmt to be used to replace the
3063 pattern. */
3064 var = vect_recog_temp_ssa_var (type, NULL);
3065 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3066 oprnd1);
3067 return pattern_stmt;
3068 }
3069 return NULL;
3070 }
3071
3072 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3073 {
3074 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3075 if (optab2
3076 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3077 goto use_rotate;
3078 }
3079
3080 tree utype = unsigned_type_for (type);
3081 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3082 if (!uvectype)
3083 return NULL;
3084
3085 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3086 don't do anything here either. */
3087 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3088 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3089 if (!optab1
3090 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3091 || !optab2
3092 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3093 {
3094 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3095 return NULL;
3096 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3097 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3098 if (!optab1
3099 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3100 || !optab2
3101 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3102 return NULL;
3103 }
3104
3105 *type_out = vectype;
3106
3107 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3108 {
3109 def = vect_recog_temp_ssa_var (utype, NULL);
3110 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3111 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3112 oprnd0 = def;
3113 }
3114
3115 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3116 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3117
3118 def = NULL_TREE;
3119 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3120 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3121 def = oprnd1;
3122 else if (def_stmt && gimple_assign_cast_p (def_stmt))
3123 {
3124 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3125 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3126 && TYPE_PRECISION (TREE_TYPE (rhs1))
3127 == TYPE_PRECISION (type))
3128 def = rhs1;
3129 }
3130
3131 if (def == NULL_TREE)
3132 {
3133 def = vect_recog_temp_ssa_var (utype, NULL);
3134 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3135 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3136 }
3137 stype = TREE_TYPE (def);
3138
3139 if (TREE_CODE (def) == INTEGER_CST)
3140 {
3141 if (!tree_fits_uhwi_p (def)
3142 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3143 || integer_zerop (def))
3144 return NULL;
3145 def2 = build_int_cst (stype,
3146 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3147 }
3148 else
3149 {
3150 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3151
3152 if (vecstype == NULL_TREE)
3153 return NULL;
3154 def2 = vect_recog_temp_ssa_var (stype, NULL);
3155 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3156 if (ext_def)
3157 {
3158 basic_block new_bb
3159 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3160 gcc_assert (!new_bb);
3161 }
3162 else
3163 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3164
3165 def2 = vect_recog_temp_ssa_var (stype, NULL);
3166 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3167 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3168 gimple_assign_lhs (def_stmt), mask);
3169 if (ext_def)
3170 {
3171 basic_block new_bb
3172 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3173 gcc_assert (!new_bb);
3174 }
3175 else
3176 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3177 }
3178
3179 var1 = vect_recog_temp_ssa_var (utype, NULL);
3180 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
3181 ? LSHIFT_EXPR : RSHIFT_EXPR,
3182 oprnd0, def);
3183 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3184
3185 var2 = vect_recog_temp_ssa_var (utype, NULL);
3186 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
3187 ? RSHIFT_EXPR : LSHIFT_EXPR,
3188 oprnd0, def2);
3189 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3190
3191 /* Pattern detected. */
3192 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3193
3194 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3195 var = vect_recog_temp_ssa_var (utype, NULL);
3196 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
3197
3198 if (!useless_type_conversion_p (type, utype))
3199 {
3200 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3201 tree result = vect_recog_temp_ssa_var (type, NULL);
3202 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
3203 }
3204 return pattern_stmt;
3205 }
3206
3207 /* Detect a vector by vector shift pattern that wouldn't be otherwise
3208 vectorized:
3209
3210 type a_t;
3211 TYPE b_T, res_T;
3212
3213 S1 a_t = ;
3214 S2 b_T = ;
3215 S3 res_T = b_T op a_t;
3216
3217 where type 'TYPE' is a type with different size than 'type',
3218 and op is <<, >> or rotate.
3219
3220 Also detect cases:
3221
3222 type a_t;
3223 TYPE b_T, c_T, res_T;
3224
3225 S0 c_T = ;
3226 S1 a_t = (type) c_T;
3227 S2 b_T = ;
3228 S3 res_T = b_T op a_t;
3229
3230 Input/Output:
3231
3232 * STMT_VINFO: The stmt from which the pattern search begins,
3233 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
3234 with a shift/rotate which has same type on both operands, in the
3235 second case just b_T op c_T, in the first case with added cast
3236 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
3237
3238 Output:
3239
3240 * TYPE_OUT: The type of the output of this pattern.
3241
3242 * Return value: A new stmt that will be used to replace the shift/rotate
3243 S3 stmt. */
3244
3245 static gimple *
3246 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
3247 stmt_vec_info stmt_vinfo,
3248 tree *type_out)
3249 {
3250 gimple *last_stmt = stmt_vinfo->stmt;
3251 tree oprnd0, oprnd1, lhs, var;
3252 gimple *pattern_stmt;
3253 enum tree_code rhs_code;
3254
3255 if (!is_gimple_assign (last_stmt))
3256 return NULL;
3257
3258 rhs_code = gimple_assign_rhs_code (last_stmt);
3259 switch (rhs_code)
3260 {
3261 case LSHIFT_EXPR:
3262 case RSHIFT_EXPR:
3263 case LROTATE_EXPR:
3264 case RROTATE_EXPR:
3265 break;
3266 default:
3267 return NULL;
3268 }
3269
3270 lhs = gimple_assign_lhs (last_stmt);
3271 oprnd0 = gimple_assign_rhs1 (last_stmt);
3272 oprnd1 = gimple_assign_rhs2 (last_stmt);
3273 if (TREE_CODE (oprnd0) != SSA_NAME
3274 || TREE_CODE (oprnd1) != SSA_NAME
3275 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
3276 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
3277 || TYPE_PRECISION (TREE_TYPE (lhs))
3278 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
3279 return NULL;
3280
3281 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
3282 if (!def_vinfo)
3283 return NULL;
3284
3285 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
3286 if (*type_out == NULL_TREE)
3287 return NULL;
3288
3289 tree def = NULL_TREE;
3290 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
3291 if (def_stmt && gimple_assign_cast_p (def_stmt))
3292 {
3293 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3294 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
3295 && TYPE_PRECISION (TREE_TYPE (rhs1))
3296 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
3297 {
3298 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
3299 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
3300 def = rhs1;
3301 else
3302 {
3303 tree mask
3304 = build_low_bits_mask (TREE_TYPE (rhs1),
3305 TYPE_PRECISION (TREE_TYPE (oprnd1)));
3306 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3307 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
3308 tree vecstype = get_vectype_for_scalar_type (vinfo,
3309 TREE_TYPE (rhs1));
3310 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3311 }
3312 }
3313 }
3314
3315 if (def == NULL_TREE)
3316 {
3317 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
3318 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3319 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3320 }
3321
3322 /* Pattern detected. */
3323 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
3324
3325 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3326 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
3327 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
3328
3329 return pattern_stmt;
3330 }
3331
3332 /* Return true iff the target has a vector optab implementing the operation
3333 CODE on type VECTYPE. */
3334
3335 static bool
3336 target_has_vecop_for_code (tree_code code, tree vectype)
3337 {
3338 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
3339 return voptab
3340 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
3341 }
3342
3343 /* Verify that the target has optabs of VECTYPE to perform all the steps
3344 needed by the multiplication-by-immediate synthesis algorithm described by
3345 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
3346 present. Return true iff the target supports all the steps. */
3347
3348 static bool
3349 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
3350 tree vectype, bool synth_shift_p)
3351 {
3352 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
3353 return false;
3354
3355 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
3356 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
3357
3358 if (var == negate_variant
3359 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
3360 return false;
3361
3362 /* If we must synthesize shifts with additions make sure that vector
3363 addition is available. */
3364 if ((var == add_variant || synth_shift_p) && !supports_vplus)
3365 return false;
3366
3367 for (int i = 1; i < alg->ops; i++)
3368 {
3369 switch (alg->op[i])
3370 {
3371 case alg_shift:
3372 break;
3373 case alg_add_t_m2:
3374 case alg_add_t2_m:
3375 case alg_add_factor:
3376 if (!supports_vplus)
3377 return false;
3378 break;
3379 case alg_sub_t_m2:
3380 case alg_sub_t2_m:
3381 case alg_sub_factor:
3382 if (!supports_vminus)
3383 return false;
3384 break;
3385 case alg_unknown:
3386 case alg_m:
3387 case alg_zero:
3388 case alg_impossible:
3389 return false;
3390 default:
3391 gcc_unreachable ();
3392 }
3393 }
3394
3395 return true;
3396 }
3397
3398 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
3399 putting the final result in DEST. Append all statements but the last into
3400 VINFO. Return the last statement. */
3401
3402 static gimple *
3403 synth_lshift_by_additions (vec_info *vinfo,
3404 tree dest, tree op, HOST_WIDE_INT amnt,
3405 stmt_vec_info stmt_info)
3406 {
3407 HOST_WIDE_INT i;
3408 tree itype = TREE_TYPE (op);
3409 tree prev_res = op;
3410 gcc_assert (amnt >= 0);
3411 for (i = 0; i < amnt; i++)
3412 {
3413 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
3414 : dest;
3415 gimple *stmt
3416 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
3417 prev_res = tmp_var;
3418 if (i < amnt - 1)
3419 append_pattern_def_seq (vinfo, stmt_info, stmt);
3420 else
3421 return stmt;
3422 }
3423 gcc_unreachable ();
3424 return NULL;
3425 }
3426
3427 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
3428 CODE to operands OP1 and OP2, creating a new temporary SSA var in
3429 the process if necessary. Append the resulting assignment statements
3430 to the sequence in STMT_VINFO. Return the SSA variable that holds the
3431 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
3432 left shifts using additions. */
3433
3434 static tree
3435 apply_binop_and_append_stmt (vec_info *vinfo,
3436 tree_code code, tree op1, tree op2,
3437 stmt_vec_info stmt_vinfo, bool synth_shift_p)
3438 {
3439 if (integer_zerop (op2)
3440 && (code == LSHIFT_EXPR
3441 || code == PLUS_EXPR))
3442 {
3443 gcc_assert (TREE_CODE (op1) == SSA_NAME);
3444 return op1;
3445 }
3446
3447 gimple *stmt;
3448 tree itype = TREE_TYPE (op1);
3449 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
3450
3451 if (code == LSHIFT_EXPR
3452 && synth_shift_p)
3453 {
3454 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
3455 TREE_INT_CST_LOW (op2), stmt_vinfo);
3456 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3457 return tmp_var;
3458 }
3459
3460 stmt = gimple_build_assign (tmp_var, code, op1, op2);
3461 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3462 return tmp_var;
3463 }
3464
3465 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3466 and simple arithmetic operations to be vectorized. Record the statements
3467 produced in STMT_VINFO and return the last statement in the sequence or
3468 NULL if it's not possible to synthesize such a multiplication.
3469 This function mirrors the behavior of expand_mult_const in expmed.cc but
3470 works on tree-ssa form. */
3471
3472 static gimple *
3473 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3474 stmt_vec_info stmt_vinfo)
3475 {
3476 tree itype = TREE_TYPE (op);
3477 machine_mode mode = TYPE_MODE (itype);
3478 struct algorithm alg;
3479 mult_variant variant;
3480 if (!tree_fits_shwi_p (val))
3481 return NULL;
3482
3483 /* Multiplication synthesis by shifts, adds and subs can introduce
3484 signed overflow where the original operation didn't. Perform the
3485 operations on an unsigned type and cast back to avoid this.
3486 In the future we may want to relax this for synthesis algorithms
3487 that we can prove do not cause unexpected overflow. */
3488 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3489
3490 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3491 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3492 if (!vectype)
3493 return NULL;
3494
3495 /* Targets that don't support vector shifts but support vector additions
3496 can synthesize shifts that way. */
3497 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3498
3499 HOST_WIDE_INT hwval = tree_to_shwi (val);
3500 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3501 The vectorizer's benefit analysis will decide whether it's beneficial
3502 to do this. */
3503 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
3504 ? TYPE_MODE (vectype) : mode,
3505 hwval, &alg, &variant, MAX_COST);
3506 if (!possible)
3507 return NULL;
3508
3509 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
3510 return NULL;
3511
3512 tree accumulator;
3513
3514 /* Clear out the sequence of statements so we can populate it below. */
3515 gimple *stmt = NULL;
3516
3517 if (cast_to_unsigned_p)
3518 {
3519 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3520 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3521 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3522 op = tmp_op;
3523 }
3524
3525 if (alg.op[0] == alg_zero)
3526 accumulator = build_int_cst (multtype, 0);
3527 else
3528 accumulator = op;
3529
3530 bool needs_fixup = (variant == negate_variant)
3531 || (variant == add_variant);
3532
3533 for (int i = 1; i < alg.ops; i++)
3534 {
3535 tree shft_log = build_int_cst (multtype, alg.log[i]);
3536 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3537 tree tmp_var = NULL_TREE;
3538
3539 switch (alg.op[i])
3540 {
3541 case alg_shift:
3542 if (synth_shift_p)
3543 stmt
3544 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3545 alg.log[i], stmt_vinfo);
3546 else
3547 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3548 shft_log);
3549 break;
3550 case alg_add_t_m2:
3551 tmp_var
3552 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3553 stmt_vinfo, synth_shift_p);
3554 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3555 tmp_var);
3556 break;
3557 case alg_sub_t_m2:
3558 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3559 shft_log, stmt_vinfo,
3560 synth_shift_p);
3561 /* In some algorithms the first step involves zeroing the
3562 accumulator. If subtracting from such an accumulator
3563 just emit the negation directly. */
3564 if (integer_zerop (accumulator))
3565 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3566 else
3567 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3568 tmp_var);
3569 break;
3570 case alg_add_t2_m:
3571 tmp_var
3572 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3573 shft_log, stmt_vinfo, synth_shift_p);
3574 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
3575 break;
3576 case alg_sub_t2_m:
3577 tmp_var
3578 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3579 shft_log, stmt_vinfo, synth_shift_p);
3580 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
3581 break;
3582 case alg_add_factor:
3583 tmp_var
3584 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3585 shft_log, stmt_vinfo, synth_shift_p);
3586 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3587 tmp_var);
3588 break;
3589 case alg_sub_factor:
3590 tmp_var
3591 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3592 shft_log, stmt_vinfo, synth_shift_p);
3593 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
3594 accumulator);
3595 break;
3596 default:
3597 gcc_unreachable ();
3598 }
3599 /* We don't want to append the last stmt in the sequence to stmt_vinfo
3600 but rather return it directly. */
3601
3602 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
3603 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3604 accumulator = accum_tmp;
3605 }
3606 if (variant == negate_variant)
3607 {
3608 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3609 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
3610 accumulator = accum_tmp;
3611 if (cast_to_unsigned_p)
3612 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3613 }
3614 else if (variant == add_variant)
3615 {
3616 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3617 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
3618 accumulator = accum_tmp;
3619 if (cast_to_unsigned_p)
3620 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3621 }
3622 /* Move back to a signed if needed. */
3623 if (cast_to_unsigned_p)
3624 {
3625 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
3626 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
3627 }
3628
3629 return stmt;
3630 }
3631
3632 /* Detect multiplication by constant and convert it into a sequence of
3633 shifts and additions, subtractions, negations. We reuse the
3634 choose_mult_variant algorithms from expmed.cc
3635
3636 Input/Output:
3637
3638 STMT_VINFO: The stmt from which the pattern search begins,
3639 i.e. the mult stmt.
3640
3641 Output:
3642
3643 * TYPE_OUT: The type of the output of this pattern.
3644
3645 * Return value: A new stmt that will be used to replace
3646 the multiplication. */
3647
3648 static gimple *
3649 vect_recog_mult_pattern (vec_info *vinfo,
3650 stmt_vec_info stmt_vinfo, tree *type_out)
3651 {
3652 gimple *last_stmt = stmt_vinfo->stmt;
3653 tree oprnd0, oprnd1, vectype, itype;
3654 gimple *pattern_stmt;
3655
3656 if (!is_gimple_assign (last_stmt))
3657 return NULL;
3658
3659 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3660 return NULL;
3661
3662 oprnd0 = gimple_assign_rhs1 (last_stmt);
3663 oprnd1 = gimple_assign_rhs2 (last_stmt);
3664 itype = TREE_TYPE (oprnd0);
3665
3666 if (TREE_CODE (oprnd0) != SSA_NAME
3667 || TREE_CODE (oprnd1) != INTEGER_CST
3668 || !INTEGRAL_TYPE_P (itype)
3669 || !type_has_mode_precision_p (itype))
3670 return NULL;
3671
3672 vectype = get_vectype_for_scalar_type (vinfo, itype);
3673 if (vectype == NULL_TREE)
3674 return NULL;
3675
3676 /* If the target can handle vectorized multiplication natively,
3677 don't attempt to optimize this. */
3678 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3679 if (mul_optab != unknown_optab)
3680 {
3681 machine_mode vec_mode = TYPE_MODE (vectype);
3682 int icode = (int) optab_handler (mul_optab, vec_mode);
3683 if (icode != CODE_FOR_nothing)
3684 return NULL;
3685 }
3686
3687 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3688 oprnd0, oprnd1, stmt_vinfo);
3689 if (!pattern_stmt)
3690 return NULL;
3691
3692 /* Pattern detected. */
3693 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3694
3695 *type_out = vectype;
3696
3697 return pattern_stmt;
3698 }
3699
3700 /* Detect a signed division by a constant that wouldn't be
3701 otherwise vectorized:
3702
3703 type a_t, b_t;
3704
3705 S1 a_t = b_t / N;
3706
3707 where type 'type' is an integral type and N is a constant.
3708
3709 Similarly handle modulo by a constant:
3710
3711 S4 a_t = b_t % N;
3712
3713 Input/Output:
3714
3715 * STMT_VINFO: The stmt from which the pattern search begins,
3716 i.e. the division stmt. S1 is replaced by if N is a power
3717 of two constant and type is signed:
3718 S3 y_t = b_t < 0 ? N - 1 : 0;
3719 S2 x_t = b_t + y_t;
3720 S1' a_t = x_t >> log2 (N);
3721
3722 S4 is replaced if N is a power of two constant and
3723 type is signed by (where *_T temporaries have unsigned type):
3724 S9 y_T = b_t < 0 ? -1U : 0U;
3725 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3726 S7 z_t = (type) z_T;
3727 S6 w_t = b_t + z_t;
3728 S5 x_t = w_t & (N - 1);
3729 S4' a_t = x_t - z_t;
3730
3731 Output:
3732
3733 * TYPE_OUT: The type of the output of this pattern.
3734
3735 * Return value: A new stmt that will be used to replace the division
3736 S1 or modulo S4 stmt. */
3737
3738 static gimple *
3739 vect_recog_divmod_pattern (vec_info *vinfo,
3740 stmt_vec_info stmt_vinfo, tree *type_out)
3741 {
3742 gimple *last_stmt = stmt_vinfo->stmt;
3743 tree oprnd0, oprnd1, vectype, itype, cond;
3744 gimple *pattern_stmt, *def_stmt;
3745 enum tree_code rhs_code;
3746 optab optab;
3747 tree q;
3748 int dummy_int, prec;
3749
3750 if (!is_gimple_assign (last_stmt))
3751 return NULL;
3752
3753 rhs_code = gimple_assign_rhs_code (last_stmt);
3754 switch (rhs_code)
3755 {
3756 case TRUNC_DIV_EXPR:
3757 case EXACT_DIV_EXPR:
3758 case TRUNC_MOD_EXPR:
3759 break;
3760 default:
3761 return NULL;
3762 }
3763
3764 oprnd0 = gimple_assign_rhs1 (last_stmt);
3765 oprnd1 = gimple_assign_rhs2 (last_stmt);
3766 itype = TREE_TYPE (oprnd0);
3767 if (TREE_CODE (oprnd0) != SSA_NAME
3768 || TREE_CODE (oprnd1) != INTEGER_CST
3769 || TREE_CODE (itype) != INTEGER_TYPE
3770 || !type_has_mode_precision_p (itype))
3771 return NULL;
3772
3773 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3774 vectype = get_vectype_for_scalar_type (vinfo, itype);
3775 if (vectype == NULL_TREE)
3776 return NULL;
3777
3778 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3779 {
3780 /* If the target can handle vectorized division or modulo natively,
3781 don't attempt to optimize this, since native division is likely
3782 to give smaller code. */
3783 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3784 if (optab != unknown_optab)
3785 {
3786 machine_mode vec_mode = TYPE_MODE (vectype);
3787 int icode = (int) optab_handler (optab, vec_mode);
3788 if (icode != CODE_FOR_nothing)
3789 return NULL;
3790 }
3791 }
3792
3793 prec = TYPE_PRECISION (itype);
3794 if (integer_pow2p (oprnd1))
3795 {
3796 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3797 return NULL;
3798
3799 /* Pattern detected. */
3800 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3801
3802 *type_out = vectype;
3803
3804 /* Check if the target supports this internal function. */
3805 internal_fn ifn = IFN_DIV_POW2;
3806 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3807 {
3808 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3809
3810 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3811 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3812 gimple_call_set_lhs (div_stmt, var_div);
3813
3814 if (rhs_code == TRUNC_MOD_EXPR)
3815 {
3816 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3817 def_stmt
3818 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3819 LSHIFT_EXPR, var_div, shift);
3820 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3821 pattern_stmt
3822 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3823 MINUS_EXPR, oprnd0,
3824 gimple_assign_lhs (def_stmt));
3825 }
3826 else
3827 pattern_stmt = div_stmt;
3828 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3829
3830 return pattern_stmt;
3831 }
3832
3833 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3834 build_int_cst (itype, 0));
3835 if (rhs_code == TRUNC_DIV_EXPR
3836 || rhs_code == EXACT_DIV_EXPR)
3837 {
3838 tree var = vect_recog_temp_ssa_var (itype, NULL);
3839 tree shift;
3840 def_stmt
3841 = gimple_build_assign (var, COND_EXPR, cond,
3842 fold_build2 (MINUS_EXPR, itype, oprnd1,
3843 build_int_cst (itype, 1)),
3844 build_int_cst (itype, 0));
3845 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3846 var = vect_recog_temp_ssa_var (itype, NULL);
3847 def_stmt
3848 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3849 gimple_assign_lhs (def_stmt));
3850 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3851
3852 shift = build_int_cst (itype, tree_log2 (oprnd1));
3853 pattern_stmt
3854 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3855 RSHIFT_EXPR, var, shift);
3856 }
3857 else
3858 {
3859 tree signmask;
3860 if (compare_tree_int (oprnd1, 2) == 0)
3861 {
3862 signmask = vect_recog_temp_ssa_var (itype, NULL);
3863 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3864 build_int_cst (itype, 1),
3865 build_int_cst (itype, 0));
3866 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3867 }
3868 else
3869 {
3870 tree utype
3871 = build_nonstandard_integer_type (prec, 1);
3872 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3873 tree shift
3874 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3875 - tree_log2 (oprnd1));
3876 tree var = vect_recog_temp_ssa_var (utype, NULL);
3877
3878 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3879 build_int_cst (utype, -1),
3880 build_int_cst (utype, 0));
3881 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3882 var = vect_recog_temp_ssa_var (utype, NULL);
3883 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3884 gimple_assign_lhs (def_stmt),
3885 shift);
3886 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3887 signmask = vect_recog_temp_ssa_var (itype, NULL);
3888 def_stmt
3889 = gimple_build_assign (signmask, NOP_EXPR, var);
3890 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3891 }
3892 def_stmt
3893 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3894 PLUS_EXPR, oprnd0, signmask);
3895 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3896 def_stmt
3897 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3898 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3899 fold_build2 (MINUS_EXPR, itype, oprnd1,
3900 build_int_cst (itype, 1)));
3901 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3902
3903 pattern_stmt
3904 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3905 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3906 signmask);
3907 }
3908
3909 return pattern_stmt;
3910 }
3911
3912 if (prec > HOST_BITS_PER_WIDE_INT
3913 || integer_zerop (oprnd1))
3914 return NULL;
3915
3916 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3917 return NULL;
3918
3919 if (TYPE_UNSIGNED (itype))
3920 {
3921 unsigned HOST_WIDE_INT mh, ml;
3922 int pre_shift, post_shift;
3923 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3924 & GET_MODE_MASK (itype_mode));
3925 tree t1, t2, t3, t4;
3926
3927 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3928 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3929 return NULL;
3930
3931 /* Find a suitable multiplier and right shift count
3932 instead of multiplying with D. */
3933 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3934
3935 /* If the suggested multiplier is more than SIZE bits, we can do better
3936 for even divisors, using an initial right shift. */
3937 if (mh != 0 && (d & 1) == 0)
3938 {
3939 pre_shift = ctz_or_zero (d);
3940 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3941 &ml, &post_shift, &dummy_int);
3942 gcc_assert (!mh);
3943 }
3944 else
3945 pre_shift = 0;
3946
3947 if (mh != 0)
3948 {
3949 if (post_shift - 1 >= prec)
3950 return NULL;
3951
3952 /* t1 = oprnd0 h* ml;
3953 t2 = oprnd0 - t1;
3954 t3 = t2 >> 1;
3955 t4 = t1 + t3;
3956 q = t4 >> (post_shift - 1); */
3957 t1 = vect_recog_temp_ssa_var (itype, NULL);
3958 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3959 build_int_cst (itype, ml));
3960 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3961
3962 t2 = vect_recog_temp_ssa_var (itype, NULL);
3963 def_stmt
3964 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3965 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3966
3967 t3 = vect_recog_temp_ssa_var (itype, NULL);
3968 def_stmt
3969 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3970 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3971
3972 t4 = vect_recog_temp_ssa_var (itype, NULL);
3973 def_stmt
3974 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3975
3976 if (post_shift != 1)
3977 {
3978 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3979
3980 q = vect_recog_temp_ssa_var (itype, NULL);
3981 pattern_stmt
3982 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3983 build_int_cst (itype, post_shift - 1));
3984 }
3985 else
3986 {
3987 q = t4;
3988 pattern_stmt = def_stmt;
3989 }
3990 }
3991 else
3992 {
3993 if (pre_shift >= prec || post_shift >= prec)
3994 return NULL;
3995
3996 /* t1 = oprnd0 >> pre_shift;
3997 t2 = t1 h* ml;
3998 q = t2 >> post_shift; */
3999 if (pre_shift)
4000 {
4001 t1 = vect_recog_temp_ssa_var (itype, NULL);
4002 def_stmt
4003 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
4004 build_int_cst (NULL, pre_shift));
4005 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4006 }
4007 else
4008 t1 = oprnd0;
4009
4010 t2 = vect_recog_temp_ssa_var (itype, NULL);
4011 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
4012 build_int_cst (itype, ml));
4013
4014 if (post_shift)
4015 {
4016 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4017
4018 q = vect_recog_temp_ssa_var (itype, NULL);
4019 def_stmt
4020 = gimple_build_assign (q, RSHIFT_EXPR, t2,
4021 build_int_cst (itype, post_shift));
4022 }
4023 else
4024 q = t2;
4025
4026 pattern_stmt = def_stmt;
4027 }
4028 }
4029 else
4030 {
4031 unsigned HOST_WIDE_INT ml;
4032 int post_shift;
4033 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
4034 unsigned HOST_WIDE_INT abs_d;
4035 bool add = false;
4036 tree t1, t2, t3, t4;
4037
4038 /* Give up for -1. */
4039 if (d == -1)
4040 return NULL;
4041
4042 /* Since d might be INT_MIN, we have to cast to
4043 unsigned HOST_WIDE_INT before negating to avoid
4044 undefined signed overflow. */
4045 abs_d = (d >= 0
4046 ? (unsigned HOST_WIDE_INT) d
4047 : - (unsigned HOST_WIDE_INT) d);
4048
4049 /* n rem d = n rem -d */
4050 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
4051 {
4052 d = abs_d;
4053 oprnd1 = build_int_cst (itype, abs_d);
4054 }
4055 if (HOST_BITS_PER_WIDE_INT >= prec
4056 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
4057 /* This case is not handled correctly below. */
4058 return NULL;
4059
4060 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
4061 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
4062 {
4063 add = true;
4064 ml |= HOST_WIDE_INT_M1U << (prec - 1);
4065 }
4066 if (post_shift >= prec)
4067 return NULL;
4068
4069 /* t1 = oprnd0 h* ml; */
4070 t1 = vect_recog_temp_ssa_var (itype, NULL);
4071 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4072 build_int_cst (itype, ml));
4073
4074 if (add)
4075 {
4076 /* t2 = t1 + oprnd0; */
4077 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4078 t2 = vect_recog_temp_ssa_var (itype, NULL);
4079 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
4080 }
4081 else
4082 t2 = t1;
4083
4084 if (post_shift)
4085 {
4086 /* t3 = t2 >> post_shift; */
4087 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4088 t3 = vect_recog_temp_ssa_var (itype, NULL);
4089 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
4090 build_int_cst (itype, post_shift));
4091 }
4092 else
4093 t3 = t2;
4094
4095 int msb = 1;
4096 value_range r;
4097 get_range_query (cfun)->range_of_expr (r, oprnd0);
4098 if (r.kind () == VR_RANGE)
4099 {
4100 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
4101 msb = 0;
4102 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
4103 msb = -1;
4104 }
4105
4106 if (msb == 0 && d >= 0)
4107 {
4108 /* q = t3; */
4109 q = t3;
4110 pattern_stmt = def_stmt;
4111 }
4112 else
4113 {
4114 /* t4 = oprnd0 >> (prec - 1);
4115 or if we know from VRP that oprnd0 >= 0
4116 t4 = 0;
4117 or if we know from VRP that oprnd0 < 0
4118 t4 = -1; */
4119 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4120 t4 = vect_recog_temp_ssa_var (itype, NULL);
4121 if (msb != 1)
4122 def_stmt = gimple_build_assign (t4, INTEGER_CST,
4123 build_int_cst (itype, msb));
4124 else
4125 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
4126 build_int_cst (itype, prec - 1));
4127 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4128
4129 /* q = t3 - t4; or q = t4 - t3; */
4130 q = vect_recog_temp_ssa_var (itype, NULL);
4131 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
4132 d < 0 ? t3 : t4);
4133 }
4134 }
4135
4136 if (rhs_code == TRUNC_MOD_EXPR)
4137 {
4138 tree r, t1;
4139
4140 /* We divided. Now finish by:
4141 t1 = q * oprnd1;
4142 r = oprnd0 - t1; */
4143 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
4144
4145 t1 = vect_recog_temp_ssa_var (itype, NULL);
4146 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
4147 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4148
4149 r = vect_recog_temp_ssa_var (itype, NULL);
4150 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
4151 }
4152
4153 /* Pattern detected. */
4154 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4155
4156 *type_out = vectype;
4157 return pattern_stmt;
4158 }
4159
4160 /* Function vect_recog_mixed_size_cond_pattern
4161
4162 Try to find the following pattern:
4163
4164 type x_t, y_t;
4165 TYPE a_T, b_T, c_T;
4166 loop:
4167 S1 a_T = x_t CMP y_t ? b_T : c_T;
4168
4169 where type 'TYPE' is an integral type which has different size
4170 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
4171 than 'type', the constants need to fit into an integer type
4172 with the same width as 'type') or results of conversion from 'type'.
4173
4174 Input:
4175
4176 * STMT_VINFO: The stmt from which the pattern search begins.
4177
4178 Output:
4179
4180 * TYPE_OUT: The type of the output of this pattern.
4181
4182 * Return value: A new stmt that will be used to replace the pattern.
4183 Additionally a def_stmt is added.
4184
4185 a_it = x_t CMP y_t ? b_it : c_it;
4186 a_T = (TYPE) a_it; */
4187
4188 static gimple *
4189 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
4190 stmt_vec_info stmt_vinfo, tree *type_out)
4191 {
4192 gimple *last_stmt = stmt_vinfo->stmt;
4193 tree cond_expr, then_clause, else_clause;
4194 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
4195 gimple *pattern_stmt, *def_stmt;
4196 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
4197 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
4198 bool promotion;
4199 tree comp_scalar_type;
4200
4201 if (!is_gimple_assign (last_stmt)
4202 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
4203 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
4204 return NULL;
4205
4206 cond_expr = gimple_assign_rhs1 (last_stmt);
4207 then_clause = gimple_assign_rhs2 (last_stmt);
4208 else_clause = gimple_assign_rhs3 (last_stmt);
4209
4210 if (!COMPARISON_CLASS_P (cond_expr))
4211 return NULL;
4212
4213 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
4214 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
4215 if (comp_vectype == NULL_TREE)
4216 return NULL;
4217
4218 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
4219 if (types_compatible_p (type, comp_scalar_type)
4220 || ((TREE_CODE (then_clause) != INTEGER_CST
4221 || TREE_CODE (else_clause) != INTEGER_CST)
4222 && !INTEGRAL_TYPE_P (comp_scalar_type))
4223 || !INTEGRAL_TYPE_P (type))
4224 return NULL;
4225
4226 if ((TREE_CODE (then_clause) != INTEGER_CST
4227 && !type_conversion_p (vinfo, then_clause, false,
4228 &orig_type0, &def_stmt0, &promotion))
4229 || (TREE_CODE (else_clause) != INTEGER_CST
4230 && !type_conversion_p (vinfo, else_clause, false,
4231 &orig_type1, &def_stmt1, &promotion)))
4232 return NULL;
4233
4234 if (orig_type0 && orig_type1
4235 && !types_compatible_p (orig_type0, orig_type1))
4236 return NULL;
4237
4238 if (orig_type0)
4239 {
4240 if (!types_compatible_p (orig_type0, comp_scalar_type))
4241 return NULL;
4242 then_clause = gimple_assign_rhs1 (def_stmt0);
4243 itype = orig_type0;
4244 }
4245
4246 if (orig_type1)
4247 {
4248 if (!types_compatible_p (orig_type1, comp_scalar_type))
4249 return NULL;
4250 else_clause = gimple_assign_rhs1 (def_stmt1);
4251 itype = orig_type1;
4252 }
4253
4254
4255 HOST_WIDE_INT cmp_mode_size
4256 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
4257
4258 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
4259 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
4260 return NULL;
4261
4262 vectype = get_vectype_for_scalar_type (vinfo, type);
4263 if (vectype == NULL_TREE)
4264 return NULL;
4265
4266 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
4267 return NULL;
4268
4269 if (itype == NULL_TREE)
4270 itype = build_nonstandard_integer_type (cmp_mode_size,
4271 TYPE_UNSIGNED (type));
4272
4273 if (itype == NULL_TREE
4274 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
4275 return NULL;
4276
4277 vecitype = get_vectype_for_scalar_type (vinfo, itype);
4278 if (vecitype == NULL_TREE)
4279 return NULL;
4280
4281 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
4282 return NULL;
4283
4284 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
4285 {
4286 if ((TREE_CODE (then_clause) == INTEGER_CST
4287 && !int_fits_type_p (then_clause, itype))
4288 || (TREE_CODE (else_clause) == INTEGER_CST
4289 && !int_fits_type_p (else_clause, itype)))
4290 return NULL;
4291 }
4292
4293 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4294 COND_EXPR, unshare_expr (cond_expr),
4295 fold_convert (itype, then_clause),
4296 fold_convert (itype, else_clause));
4297 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4298 NOP_EXPR, gimple_assign_lhs (def_stmt));
4299
4300 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
4301 *type_out = vectype;
4302
4303 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
4304
4305 return pattern_stmt;
4306 }
4307
4308
4309 /* Helper function of vect_recog_bool_pattern. Called recursively, return
4310 true if bool VAR can and should be optimized that way. Assume it shouldn't
4311 in case it's a result of a comparison which can be directly vectorized into
4312 a vector comparison. Fills in STMTS with all stmts visited during the
4313 walk. */
4314
4315 static bool
4316 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
4317 {
4318 tree rhs1;
4319 enum tree_code rhs_code;
4320
4321 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4322 if (!def_stmt_info)
4323 return false;
4324
4325 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
4326 if (!def_stmt)
4327 return false;
4328
4329 if (stmts.contains (def_stmt))
4330 return true;
4331
4332 rhs1 = gimple_assign_rhs1 (def_stmt);
4333 rhs_code = gimple_assign_rhs_code (def_stmt);
4334 switch (rhs_code)
4335 {
4336 case SSA_NAME:
4337 if (! check_bool_pattern (rhs1, vinfo, stmts))
4338 return false;
4339 break;
4340
4341 CASE_CONVERT:
4342 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
4343 return false;
4344 if (! check_bool_pattern (rhs1, vinfo, stmts))
4345 return false;
4346 break;
4347
4348 case BIT_NOT_EXPR:
4349 if (! check_bool_pattern (rhs1, vinfo, stmts))
4350 return false;
4351 break;
4352
4353 case BIT_AND_EXPR:
4354 case BIT_IOR_EXPR:
4355 case BIT_XOR_EXPR:
4356 if (! check_bool_pattern (rhs1, vinfo, stmts)
4357 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
4358 return false;
4359 break;
4360
4361 default:
4362 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
4363 {
4364 tree vecitype, comp_vectype;
4365
4366 /* If the comparison can throw, then is_gimple_condexpr will be
4367 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
4368 if (stmt_could_throw_p (cfun, def_stmt))
4369 return false;
4370
4371 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
4372 if (comp_vectype == NULL_TREE)
4373 return false;
4374
4375 tree mask_type = get_mask_type_for_scalar_type (vinfo,
4376 TREE_TYPE (rhs1));
4377 if (mask_type
4378 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
4379 return false;
4380
4381 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
4382 {
4383 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4384 tree itype
4385 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4386 vecitype = get_vectype_for_scalar_type (vinfo, itype);
4387 if (vecitype == NULL_TREE)
4388 return false;
4389 }
4390 else
4391 vecitype = comp_vectype;
4392 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
4393 return false;
4394 }
4395 else
4396 return false;
4397 break;
4398 }
4399
4400 bool res = stmts.add (def_stmt);
4401 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
4402 gcc_assert (!res);
4403
4404 return true;
4405 }
4406
4407
4408 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
4409 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
4410 pattern sequence. */
4411
4412 static tree
4413 adjust_bool_pattern_cast (vec_info *vinfo,
4414 tree type, tree var, stmt_vec_info stmt_info)
4415 {
4416 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4417 NOP_EXPR, var);
4418 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
4419 get_vectype_for_scalar_type (vinfo, type));
4420 return gimple_assign_lhs (cast_stmt);
4421 }
4422
4423 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
4424 VAR is an SSA_NAME that should be transformed from bool to a wider integer
4425 type, OUT_TYPE is the desired final integer type of the whole pattern.
4426 STMT_INFO is the info of the pattern root and is where pattern stmts should
4427 be associated with. DEFS is a map of pattern defs. */
4428
4429 static void
4430 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
4431 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
4432 {
4433 gimple *stmt = SSA_NAME_DEF_STMT (var);
4434 enum tree_code rhs_code, def_rhs_code;
4435 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
4436 location_t loc;
4437 gimple *pattern_stmt, *def_stmt;
4438 tree trueval = NULL_TREE;
4439
4440 rhs1 = gimple_assign_rhs1 (stmt);
4441 rhs2 = gimple_assign_rhs2 (stmt);
4442 rhs_code = gimple_assign_rhs_code (stmt);
4443 loc = gimple_location (stmt);
4444 switch (rhs_code)
4445 {
4446 case SSA_NAME:
4447 CASE_CONVERT:
4448 irhs1 = *defs.get (rhs1);
4449 itype = TREE_TYPE (irhs1);
4450 pattern_stmt
4451 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4452 SSA_NAME, irhs1);
4453 break;
4454
4455 case BIT_NOT_EXPR:
4456 irhs1 = *defs.get (rhs1);
4457 itype = TREE_TYPE (irhs1);
4458 pattern_stmt
4459 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4460 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
4461 break;
4462
4463 case BIT_AND_EXPR:
4464 /* Try to optimize x = y & (a < b ? 1 : 0); into
4465 x = (a < b ? y : 0);
4466
4467 E.g. for:
4468 bool a_b, b_b, c_b;
4469 TYPE d_T;
4470
4471 S1 a_b = x1 CMP1 y1;
4472 S2 b_b = x2 CMP2 y2;
4473 S3 c_b = a_b & b_b;
4474 S4 d_T = (TYPE) c_b;
4475
4476 we would normally emit:
4477
4478 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4479 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4480 S3' c_T = a_T & b_T;
4481 S4' d_T = c_T;
4482
4483 but we can save one stmt by using the
4484 result of one of the COND_EXPRs in the other COND_EXPR and leave
4485 BIT_AND_EXPR stmt out:
4486
4487 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4488 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4489 S4' f_T = c_T;
4490
4491 At least when VEC_COND_EXPR is implemented using masks
4492 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4493 computes the comparison masks and ands it, in one case with
4494 all ones vector, in the other case with a vector register.
4495 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4496 often more expensive. */
4497 def_stmt = SSA_NAME_DEF_STMT (rhs2);
4498 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4499 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4500 {
4501 irhs1 = *defs.get (rhs1);
4502 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4503 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4504 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4505 {
4506 rhs_code = def_rhs_code;
4507 rhs1 = def_rhs1;
4508 rhs2 = gimple_assign_rhs2 (def_stmt);
4509 trueval = irhs1;
4510 goto do_compare;
4511 }
4512 else
4513 irhs2 = *defs.get (rhs2);
4514 goto and_ior_xor;
4515 }
4516 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4517 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4518 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4519 {
4520 irhs2 = *defs.get (rhs2);
4521 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4522 if (TYPE_PRECISION (TREE_TYPE (irhs2))
4523 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4524 {
4525 rhs_code = def_rhs_code;
4526 rhs1 = def_rhs1;
4527 rhs2 = gimple_assign_rhs2 (def_stmt);
4528 trueval = irhs2;
4529 goto do_compare;
4530 }
4531 else
4532 irhs1 = *defs.get (rhs1);
4533 goto and_ior_xor;
4534 }
4535 /* FALLTHRU */
4536 case BIT_IOR_EXPR:
4537 case BIT_XOR_EXPR:
4538 irhs1 = *defs.get (rhs1);
4539 irhs2 = *defs.get (rhs2);
4540 and_ior_xor:
4541 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4542 != TYPE_PRECISION (TREE_TYPE (irhs2)))
4543 {
4544 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
4545 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
4546 int out_prec = TYPE_PRECISION (out_type);
4547 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
4548 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
4549 stmt_info);
4550 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
4551 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
4552 stmt_info);
4553 else
4554 {
4555 irhs1 = adjust_bool_pattern_cast (vinfo,
4556 out_type, irhs1, stmt_info);
4557 irhs2 = adjust_bool_pattern_cast (vinfo,
4558 out_type, irhs2, stmt_info);
4559 }
4560 }
4561 itype = TREE_TYPE (irhs1);
4562 pattern_stmt
4563 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4564 rhs_code, irhs1, irhs2);
4565 break;
4566
4567 default:
4568 do_compare:
4569 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
4570 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
4571 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
4572 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
4573 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
4574 {
4575 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4576 itype
4577 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4578 }
4579 else
4580 itype = TREE_TYPE (rhs1);
4581 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
4582 if (trueval == NULL_TREE)
4583 trueval = build_int_cst (itype, 1);
4584 else
4585 gcc_checking_assert (useless_type_conversion_p (itype,
4586 TREE_TYPE (trueval)));
4587 pattern_stmt
4588 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4589 COND_EXPR, cond_expr, trueval,
4590 build_int_cst (itype, 0));
4591 break;
4592 }
4593
4594 gimple_set_location (pattern_stmt, loc);
4595 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
4596 get_vectype_for_scalar_type (vinfo, itype));
4597 defs.put (var, gimple_assign_lhs (pattern_stmt));
4598 }
4599
4600 /* Comparison function to qsort a vector of gimple stmts after UID. */
4601
4602 static int
4603 sort_after_uid (const void *p1, const void *p2)
4604 {
4605 const gimple *stmt1 = *(const gimple * const *)p1;
4606 const gimple *stmt2 = *(const gimple * const *)p2;
4607 return gimple_uid (stmt1) - gimple_uid (stmt2);
4608 }
4609
4610 /* Create pattern stmts for all stmts participating in the bool pattern
4611 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4612 OUT_TYPE. Return the def of the pattern root. */
4613
4614 static tree
4615 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
4616 tree out_type, stmt_vec_info stmt_info)
4617 {
4618 /* Gather original stmts in the bool pattern in their order of appearance
4619 in the IL. */
4620 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
4621 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
4622 i != bool_stmt_set.end (); ++i)
4623 bool_stmts.quick_push (*i);
4624 bool_stmts.qsort (sort_after_uid);
4625
4626 /* Now process them in that order, producing pattern stmts. */
4627 hash_map <tree, tree> defs;
4628 for (unsigned i = 0; i < bool_stmts.length (); ++i)
4629 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
4630 out_type, stmt_info, defs);
4631
4632 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
4633 gimple *pattern_stmt
4634 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4635 return gimple_assign_lhs (pattern_stmt);
4636 }
4637
4638 /* Return the proper type for converting bool VAR into
4639 an integer value or NULL_TREE if no such type exists.
4640 The type is chosen so that the converted value has the
4641 same number of elements as VAR's vector type. */
4642
4643 static tree
4644 integer_type_for_mask (tree var, vec_info *vinfo)
4645 {
4646 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4647 return NULL_TREE;
4648
4649 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4650 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4651 return NULL_TREE;
4652
4653 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4654 }
4655
4656 /* Function vect_recog_bool_pattern
4657
4658 Try to find pattern like following:
4659
4660 bool a_b, b_b, c_b, d_b, e_b;
4661 TYPE f_T;
4662 loop:
4663 S1 a_b = x1 CMP1 y1;
4664 S2 b_b = x2 CMP2 y2;
4665 S3 c_b = a_b & b_b;
4666 S4 d_b = x3 CMP3 y3;
4667 S5 e_b = c_b | d_b;
4668 S6 f_T = (TYPE) e_b;
4669
4670 where type 'TYPE' is an integral type. Or a similar pattern
4671 ending in
4672
4673 S6 f_Y = e_b ? r_Y : s_Y;
4674
4675 as results from if-conversion of a complex condition.
4676
4677 Input:
4678
4679 * STMT_VINFO: The stmt at the end from which the pattern
4680 search begins, i.e. cast of a bool to
4681 an integer type.
4682
4683 Output:
4684
4685 * TYPE_OUT: The type of the output of this pattern.
4686
4687 * Return value: A new stmt that will be used to replace the pattern.
4688
4689 Assuming size of TYPE is the same as size of all comparisons
4690 (otherwise some casts would be added where needed), the above
4691 sequence we create related pattern stmts:
4692 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4693 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4694 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4695 S5' e_T = c_T | d_T;
4696 S6' f_T = e_T;
4697
4698 Instead of the above S3' we could emit:
4699 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4700 S3' c_T = a_T | b_T;
4701 but the above is more efficient. */
4702
4703 static gimple *
4704 vect_recog_bool_pattern (vec_info *vinfo,
4705 stmt_vec_info stmt_vinfo, tree *type_out)
4706 {
4707 gimple *last_stmt = stmt_vinfo->stmt;
4708 enum tree_code rhs_code;
4709 tree var, lhs, rhs, vectype;
4710 gimple *pattern_stmt;
4711
4712 if (!is_gimple_assign (last_stmt))
4713 return NULL;
4714
4715 var = gimple_assign_rhs1 (last_stmt);
4716 lhs = gimple_assign_lhs (last_stmt);
4717 rhs_code = gimple_assign_rhs_code (last_stmt);
4718
4719 if (rhs_code == VIEW_CONVERT_EXPR)
4720 var = TREE_OPERAND (var, 0);
4721
4722 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4723 return NULL;
4724
4725 hash_set<gimple *> bool_stmts;
4726
4727 if (CONVERT_EXPR_CODE_P (rhs_code)
4728 || rhs_code == VIEW_CONVERT_EXPR)
4729 {
4730 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4731 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4732 return NULL;
4733 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4734
4735 if (check_bool_pattern (var, vinfo, bool_stmts))
4736 {
4737 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4738 TREE_TYPE (lhs), stmt_vinfo);
4739 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4740 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4741 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4742 else
4743 pattern_stmt
4744 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4745 }
4746 else
4747 {
4748 tree type = integer_type_for_mask (var, vinfo);
4749 tree cst0, cst1, tmp;
4750
4751 if (!type)
4752 return NULL;
4753
4754 /* We may directly use cond with narrowed type to avoid
4755 multiple cond exprs with following result packing and
4756 perform single cond with packed mask instead. In case
4757 of widening we better make cond first and then extract
4758 results. */
4759 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4760 type = TREE_TYPE (lhs);
4761
4762 cst0 = build_int_cst (type, 0);
4763 cst1 = build_int_cst (type, 1);
4764 tmp = vect_recog_temp_ssa_var (type, NULL);
4765 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4766
4767 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4768 {
4769 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4770 append_pattern_def_seq (vinfo, stmt_vinfo,
4771 pattern_stmt, new_vectype);
4772
4773 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4774 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4775 }
4776 }
4777
4778 *type_out = vectype;
4779 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4780
4781 return pattern_stmt;
4782 }
4783 else if (rhs_code == COND_EXPR
4784 && TREE_CODE (var) == SSA_NAME)
4785 {
4786 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4787 if (vectype == NULL_TREE)
4788 return NULL;
4789
4790 /* Build a scalar type for the boolean result that when
4791 vectorized matches the vector type of the result in
4792 size and number of elements. */
4793 unsigned prec
4794 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4795 TYPE_VECTOR_SUBPARTS (vectype));
4796
4797 tree type
4798 = build_nonstandard_integer_type (prec,
4799 TYPE_UNSIGNED (TREE_TYPE (var)));
4800 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4801 return NULL;
4802
4803 if (check_bool_pattern (var, vinfo, bool_stmts))
4804 var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4805 else if (integer_type_for_mask (var, vinfo))
4806 return NULL;
4807
4808 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4809 pattern_stmt
4810 = gimple_build_assign (lhs, COND_EXPR,
4811 build2 (NE_EXPR, boolean_type_node,
4812 var, build_int_cst (TREE_TYPE (var), 0)),
4813 gimple_assign_rhs2 (last_stmt),
4814 gimple_assign_rhs3 (last_stmt));
4815 *type_out = vectype;
4816 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4817
4818 return pattern_stmt;
4819 }
4820 else if (rhs_code == SSA_NAME
4821 && STMT_VINFO_DATA_REF (stmt_vinfo))
4822 {
4823 stmt_vec_info pattern_stmt_info;
4824 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4825 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4826 return NULL;
4827
4828 if (check_bool_pattern (var, vinfo, bool_stmts))
4829 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4830 TREE_TYPE (vectype), stmt_vinfo);
4831 else
4832 {
4833 tree type = integer_type_for_mask (var, vinfo);
4834 tree cst0, cst1, new_vectype;
4835
4836 if (!type)
4837 return NULL;
4838
4839 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4840 type = TREE_TYPE (vectype);
4841
4842 cst0 = build_int_cst (type, 0);
4843 cst1 = build_int_cst (type, 1);
4844 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4845
4846 rhs = vect_recog_temp_ssa_var (type, NULL);
4847 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4848 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4849 }
4850
4851 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4852 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4853 {
4854 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4855 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4856 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4857 rhs = rhs2;
4858 }
4859 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4860 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4861 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4862 *type_out = vectype;
4863 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4864
4865 return pattern_stmt;
4866 }
4867 else
4868 return NULL;
4869 }
4870
4871
4872 /* A helper for vect_recog_mask_conversion_pattern. Build
4873 conversion of MASK to a type suitable for masking VECTYPE.
4874 Built statement gets required vectype and is appended to
4875 a pattern sequence of STMT_VINFO.
4876
4877 Return converted mask. */
4878
4879 static tree
4880 build_mask_conversion (vec_info *vinfo,
4881 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4882 {
4883 gimple *stmt;
4884 tree masktype, tmp;
4885
4886 masktype = truth_type_for (vectype);
4887 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4888 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4889 append_pattern_def_seq (vinfo, stmt_vinfo,
4890 stmt, masktype, TREE_TYPE (vectype));
4891
4892 return tmp;
4893 }
4894
4895
4896 /* Function vect_recog_mask_conversion_pattern
4897
4898 Try to find statements which require boolean type
4899 converison. Additional conversion statements are
4900 added to handle such cases. For example:
4901
4902 bool m_1, m_2, m_3;
4903 int i_4, i_5;
4904 double d_6, d_7;
4905 char c_1, c_2, c_3;
4906
4907 S1 m_1 = i_4 > i_5;
4908 S2 m_2 = d_6 < d_7;
4909 S3 m_3 = m_1 & m_2;
4910 S4 c_1 = m_3 ? c_2 : c_3;
4911
4912 Will be transformed into:
4913
4914 S1 m_1 = i_4 > i_5;
4915 S2 m_2 = d_6 < d_7;
4916 S3'' m_2' = (_Bool[bitsize=32])m_2
4917 S3' m_3' = m_1 & m_2';
4918 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4919 S4' c_1' = m_3'' ? c_2 : c_3; */
4920
4921 static gimple *
4922 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4923 stmt_vec_info stmt_vinfo, tree *type_out)
4924 {
4925 gimple *last_stmt = stmt_vinfo->stmt;
4926 enum tree_code rhs_code;
4927 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4928 tree vectype1, vectype2;
4929 stmt_vec_info pattern_stmt_info;
4930 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4931 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4932
4933 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4934 if (is_gimple_call (last_stmt)
4935 && gimple_call_internal_p (last_stmt))
4936 {
4937 gcall *pattern_stmt;
4938
4939 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4940 int mask_argno = internal_fn_mask_index (ifn);
4941 if (mask_argno < 0)
4942 return NULL;
4943
4944 bool store_p = internal_store_fn_p (ifn);
4945 if (store_p)
4946 {
4947 int rhs_index = internal_fn_stored_value_index (ifn);
4948 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4949 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4950 }
4951 else
4952 {
4953 lhs = gimple_call_lhs (last_stmt);
4954 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4955 }
4956
4957 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4958 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4959 if (!mask_arg_type)
4960 return NULL;
4961 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4962
4963 if (!vectype1 || !vectype2
4964 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4965 TYPE_VECTOR_SUBPARTS (vectype2)))
4966 return NULL;
4967
4968 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4969
4970 auto_vec<tree, 8> args;
4971 unsigned int nargs = gimple_call_num_args (last_stmt);
4972 args.safe_grow (nargs, true);
4973 for (unsigned int i = 0; i < nargs; ++i)
4974 args[i] = ((int) i == mask_argno
4975 ? tmp
4976 : gimple_call_arg (last_stmt, i));
4977 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4978
4979 if (!store_p)
4980 {
4981 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4982 gimple_call_set_lhs (pattern_stmt, lhs);
4983 }
4984 gimple_call_set_nothrow (pattern_stmt, true);
4985
4986 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4987 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4988 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4989
4990 *type_out = vectype1;
4991 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4992
4993 return pattern_stmt;
4994 }
4995
4996 if (!is_gimple_assign (last_stmt))
4997 return NULL;
4998
4999 gimple *pattern_stmt;
5000 lhs = gimple_assign_lhs (last_stmt);
5001 rhs1 = gimple_assign_rhs1 (last_stmt);
5002 rhs_code = gimple_assign_rhs_code (last_stmt);
5003
5004 /* Check for cond expression requiring mask conversion. */
5005 if (rhs_code == COND_EXPR)
5006 {
5007 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5008
5009 if (TREE_CODE (rhs1) == SSA_NAME)
5010 {
5011 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5012 if (!rhs1_type)
5013 return NULL;
5014 }
5015 else if (COMPARISON_CLASS_P (rhs1))
5016 {
5017 /* Check whether we're comparing scalar booleans and (if so)
5018 whether a better mask type exists than the mask associated
5019 with boolean-sized elements. This avoids unnecessary packs
5020 and unpacks if the booleans are set from comparisons of
5021 wider types. E.g. in:
5022
5023 int x1, x2, x3, x4, y1, y1;
5024 ...
5025 bool b1 = (x1 == x2);
5026 bool b2 = (x3 == x4);
5027 ... = b1 == b2 ? y1 : y2;
5028
5029 it is better for b1 and b2 to use the mask type associated
5030 with int elements rather bool (byte) elements. */
5031 rhs1_op0 = TREE_OPERAND (rhs1, 0);
5032 rhs1_op1 = TREE_OPERAND (rhs1, 1);
5033 if (!rhs1_op0 || !rhs1_op1)
5034 return NULL;
5035 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
5036 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
5037
5038 if (!rhs1_op0_type)
5039 rhs1_type = TREE_TYPE (rhs1_op0);
5040 else if (!rhs1_op1_type)
5041 rhs1_type = TREE_TYPE (rhs1_op1);
5042 else if (TYPE_PRECISION (rhs1_op0_type)
5043 != TYPE_PRECISION (rhs1_op1_type))
5044 {
5045 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
5046 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5047 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
5048 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5049 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
5050 {
5051 if (abs (tmp0) > abs (tmp1))
5052 rhs1_type = rhs1_op1_type;
5053 else
5054 rhs1_type = rhs1_op0_type;
5055 }
5056 else
5057 rhs1_type = build_nonstandard_integer_type
5058 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
5059 }
5060 else
5061 rhs1_type = rhs1_op0_type;
5062 }
5063 else
5064 return NULL;
5065
5066 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
5067
5068 if (!vectype1 || !vectype2)
5069 return NULL;
5070
5071 /* Continue if a conversion is needed. Also continue if we have
5072 a comparison whose vector type would normally be different from
5073 VECTYPE2 when considered in isolation. In that case we'll
5074 replace the comparison with an SSA name (so that we can record
5075 its vector type) and behave as though the comparison was an SSA
5076 name from the outset. */
5077 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5078 TYPE_VECTOR_SUBPARTS (vectype2))
5079 && !rhs1_op0_type
5080 && !rhs1_op1_type)
5081 return NULL;
5082
5083 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
5084 in place, we can handle it in vectorizable_condition. This avoids
5085 unnecessary promotion stmts and increased vectorization factor. */
5086 if (COMPARISON_CLASS_P (rhs1)
5087 && INTEGRAL_TYPE_P (rhs1_type)
5088 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
5089 TYPE_VECTOR_SUBPARTS (vectype2)))
5090 {
5091 enum vect_def_type dt;
5092 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
5093 && dt == vect_external_def
5094 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
5095 && (dt == vect_external_def
5096 || dt == vect_constant_def))
5097 {
5098 tree wide_scalar_type = build_nonstandard_integer_type
5099 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
5100 tree vectype3 = get_vectype_for_scalar_type (vinfo,
5101 wide_scalar_type);
5102 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
5103 return NULL;
5104 }
5105 }
5106
5107 /* If rhs1 is a comparison we need to move it into a
5108 separate statement. */
5109 if (TREE_CODE (rhs1) != SSA_NAME)
5110 {
5111 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
5112 if (rhs1_op0_type
5113 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
5114 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
5115 vectype2, stmt_vinfo);
5116 if (rhs1_op1_type
5117 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
5118 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
5119 vectype2, stmt_vinfo);
5120 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
5121 rhs1_op0, rhs1_op1);
5122 rhs1 = tmp;
5123 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
5124 rhs1_type);
5125 }
5126
5127 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
5128 TYPE_VECTOR_SUBPARTS (vectype2)))
5129 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
5130 else
5131 tmp = rhs1;
5132
5133 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5134 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
5135 gimple_assign_rhs2 (last_stmt),
5136 gimple_assign_rhs3 (last_stmt));
5137
5138 *type_out = vectype1;
5139 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5140
5141 return pattern_stmt;
5142 }
5143
5144 /* Now check for binary boolean operations requiring conversion for
5145 one of operands. */
5146 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5147 return NULL;
5148
5149 if (rhs_code != BIT_IOR_EXPR
5150 && rhs_code != BIT_XOR_EXPR
5151 && rhs_code != BIT_AND_EXPR
5152 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
5153 return NULL;
5154
5155 rhs2 = gimple_assign_rhs2 (last_stmt);
5156
5157 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5158 rhs2_type = integer_type_for_mask (rhs2, vinfo);
5159
5160 if (!rhs1_type || !rhs2_type
5161 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
5162 return NULL;
5163
5164 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
5165 {
5166 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
5167 if (!vectype1)
5168 return NULL;
5169 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
5170 }
5171 else
5172 {
5173 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
5174 if (!vectype1)
5175 return NULL;
5176 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
5177 }
5178
5179 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5180 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
5181
5182 *type_out = vectype1;
5183 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5184
5185 return pattern_stmt;
5186 }
5187
5188 /* STMT_INFO is a load or store. If the load or store is conditional, return
5189 the boolean condition under which it occurs, otherwise return null. */
5190
5191 static tree
5192 vect_get_load_store_mask (stmt_vec_info stmt_info)
5193 {
5194 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
5195 {
5196 gcc_assert (gimple_assign_single_p (def_assign));
5197 return NULL_TREE;
5198 }
5199
5200 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
5201 {
5202 internal_fn ifn = gimple_call_internal_fn (def_call);
5203 int mask_index = internal_fn_mask_index (ifn);
5204 return gimple_call_arg (def_call, mask_index);
5205 }
5206
5207 gcc_unreachable ();
5208 }
5209
5210 /* Return MASK if MASK is suitable for masking an operation on vectors
5211 of type VECTYPE, otherwise convert it into such a form and return
5212 the result. Associate any conversion statements with STMT_INFO's
5213 pattern. */
5214
5215 static tree
5216 vect_convert_mask_for_vectype (tree mask, tree vectype,
5217 stmt_vec_info stmt_info, vec_info *vinfo)
5218 {
5219 tree mask_type = integer_type_for_mask (mask, vinfo);
5220 if (mask_type)
5221 {
5222 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
5223 if (mask_vectype
5224 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
5225 TYPE_VECTOR_SUBPARTS (mask_vectype)))
5226 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
5227 }
5228 return mask;
5229 }
5230
5231 /* Return the equivalent of:
5232
5233 fold_convert (TYPE, VALUE)
5234
5235 with the expectation that the operation will be vectorized.
5236 If new statements are needed, add them as pattern statements
5237 to STMT_INFO. */
5238
5239 static tree
5240 vect_add_conversion_to_pattern (vec_info *vinfo,
5241 tree type, tree value, stmt_vec_info stmt_info)
5242 {
5243 if (useless_type_conversion_p (type, TREE_TYPE (value)))
5244 return value;
5245
5246 tree new_value = vect_recog_temp_ssa_var (type, NULL);
5247 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
5248 append_pattern_def_seq (vinfo, stmt_info, conversion,
5249 get_vectype_for_scalar_type (vinfo, type));
5250 return new_value;
5251 }
5252
5253 /* Try to convert STMT_INFO into a call to a gather load or scatter store
5254 internal function. Return the final statement on success and set
5255 *TYPE_OUT to the vector type being loaded or stored.
5256
5257 This function only handles gathers and scatters that were recognized
5258 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
5259
5260 static gimple *
5261 vect_recog_gather_scatter_pattern (vec_info *vinfo,
5262 stmt_vec_info stmt_info, tree *type_out)
5263 {
5264 /* Currently we only support this for loop vectorization. */
5265 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5266 if (!loop_vinfo)
5267 return NULL;
5268
5269 /* Make sure that we're looking at a gather load or scatter store. */
5270 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
5271 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5272 return NULL;
5273
5274 /* Get the boolean that controls whether the load or store happens.
5275 This is null if the operation is unconditional. */
5276 tree mask = vect_get_load_store_mask (stmt_info);
5277
5278 /* Make sure that the target supports an appropriate internal
5279 function for the gather/scatter operation. */
5280 gather_scatter_info gs_info;
5281 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
5282 || gs_info.ifn == IFN_LAST)
5283 return NULL;
5284
5285 /* Convert the mask to the right form. */
5286 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
5287 gs_info.element_type);
5288 if (mask)
5289 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
5290 loop_vinfo);
5291 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
5292 || gs_info.ifn == IFN_MASK_GATHER_LOAD)
5293 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
5294
5295 /* Get the invariant base and non-invariant offset, converting the
5296 latter to the same width as the vector elements. */
5297 tree base = gs_info.base;
5298 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
5299 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
5300 gs_info.offset, stmt_info);
5301
5302 /* Build the new pattern statement. */
5303 tree scale = size_int (gs_info.scale);
5304 gcall *pattern_stmt;
5305 if (DR_IS_READ (dr))
5306 {
5307 tree zero = build_zero_cst (gs_info.element_type);
5308 if (mask != NULL)
5309 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
5310 offset, scale, zero, mask);
5311 else
5312 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
5313 offset, scale, zero);
5314 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
5315 gimple_call_set_lhs (pattern_stmt, load_lhs);
5316 }
5317 else
5318 {
5319 tree rhs = vect_get_store_rhs (stmt_info);
5320 if (mask != NULL)
5321 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
5322 base, offset, scale, rhs,
5323 mask);
5324 else
5325 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
5326 base, offset, scale, rhs);
5327 }
5328 gimple_call_set_nothrow (pattern_stmt, true);
5329
5330 /* Copy across relevant vectorization info and associate DR with the
5331 new pattern statement instead of the original statement. */
5332 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
5333 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
5334
5335 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5336 *type_out = vectype;
5337 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
5338
5339 return pattern_stmt;
5340 }
5341
5342 /* Return true if TYPE is a non-boolean integer type. These are the types
5343 that we want to consider for narrowing. */
5344
5345 static bool
5346 vect_narrowable_type_p (tree type)
5347 {
5348 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
5349 }
5350
5351 /* Return true if the operation given by CODE can be truncated to N bits
5352 when only N bits of the output are needed. This is only true if bit N+1
5353 of the inputs has no effect on the low N bits of the result. */
5354
5355 static bool
5356 vect_truncatable_operation_p (tree_code code)
5357 {
5358 switch (code)
5359 {
5360 case PLUS_EXPR:
5361 case MINUS_EXPR:
5362 case MULT_EXPR:
5363 case BIT_AND_EXPR:
5364 case BIT_IOR_EXPR:
5365 case BIT_XOR_EXPR:
5366 case COND_EXPR:
5367 return true;
5368
5369 default:
5370 return false;
5371 }
5372 }
5373
5374 /* Record that STMT_INFO could be changed from operating on TYPE to
5375 operating on a type with the precision and sign given by PRECISION
5376 and SIGN respectively. PRECISION is an arbitrary bit precision;
5377 it might not be a whole number of bytes. */
5378
5379 static void
5380 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
5381 unsigned int precision, signop sign)
5382 {
5383 /* Round the precision up to a whole number of bytes. */
5384 precision = vect_element_precision (precision);
5385 if (precision < TYPE_PRECISION (type)
5386 && (!stmt_info->operation_precision
5387 || stmt_info->operation_precision > precision))
5388 {
5389 stmt_info->operation_precision = precision;
5390 stmt_info->operation_sign = sign;
5391 }
5392 }
5393
5394 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
5395 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
5396 is an arbitrary bit precision; it might not be a whole number of bytes. */
5397
5398 static void
5399 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
5400 unsigned int min_input_precision)
5401 {
5402 /* This operation in isolation only requires the inputs to have
5403 MIN_INPUT_PRECISION of precision, However, that doesn't mean
5404 that MIN_INPUT_PRECISION is a natural precision for the chain
5405 as a whole. E.g. consider something like:
5406
5407 unsigned short *x, *y;
5408 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5409
5410 The right shift can be done on unsigned chars, and only requires the
5411 result of "*x & 0xf0" to be done on unsigned chars. But taking that
5412 approach would mean turning a natural chain of single-vector unsigned
5413 short operations into one that truncates "*x" and then extends
5414 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
5415 operation and one vector for each unsigned char operation.
5416 This would be a significant pessimization.
5417
5418 Instead only propagate the maximum of this precision and the precision
5419 required by the users of the result. This means that we don't pessimize
5420 the case above but continue to optimize things like:
5421
5422 unsigned char *y;
5423 unsigned short *x;
5424 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5425
5426 Here we would truncate two vectors of *x to a single vector of
5427 unsigned chars and use single-vector unsigned char operations for
5428 everything else, rather than doing two unsigned short copies of
5429 "(*x & 0xf0) >> 4" and then truncating the result. */
5430 min_input_precision = MAX (min_input_precision,
5431 stmt_info->min_output_precision);
5432
5433 if (min_input_precision < TYPE_PRECISION (type)
5434 && (!stmt_info->min_input_precision
5435 || stmt_info->min_input_precision > min_input_precision))
5436 stmt_info->min_input_precision = min_input_precision;
5437 }
5438
5439 /* Subroutine of vect_determine_min_output_precision. Return true if
5440 we can calculate a reduced number of output bits for STMT_INFO,
5441 whose result is LHS. */
5442
5443 static bool
5444 vect_determine_min_output_precision_1 (vec_info *vinfo,
5445 stmt_vec_info stmt_info, tree lhs)
5446 {
5447 /* Take the maximum precision required by users of the result. */
5448 unsigned int precision = 0;
5449 imm_use_iterator iter;
5450 use_operand_p use;
5451 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
5452 {
5453 gimple *use_stmt = USE_STMT (use);
5454 if (is_gimple_debug (use_stmt))
5455 continue;
5456 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
5457 if (!use_stmt_info || !use_stmt_info->min_input_precision)
5458 return false;
5459 /* The input precision recorded for COND_EXPRs applies only to the
5460 "then" and "else" values. */
5461 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5462 if (assign
5463 && gimple_assign_rhs_code (assign) == COND_EXPR
5464 && use->use != gimple_assign_rhs2_ptr (assign)
5465 && use->use != gimple_assign_rhs3_ptr (assign))
5466 return false;
5467 precision = MAX (precision, use_stmt_info->min_input_precision);
5468 }
5469
5470 if (dump_enabled_p ())
5471 dump_printf_loc (MSG_NOTE, vect_location,
5472 "only the low %d bits of %T are significant\n",
5473 precision, lhs);
5474 stmt_info->min_output_precision = precision;
5475 return true;
5476 }
5477
5478 /* Calculate min_output_precision for STMT_INFO. */
5479
5480 static void
5481 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5482 {
5483 /* We're only interested in statements with a narrowable result. */
5484 tree lhs = gimple_get_lhs (stmt_info->stmt);
5485 if (!lhs
5486 || TREE_CODE (lhs) != SSA_NAME
5487 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5488 return;
5489
5490 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5491 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5492 }
5493
5494 /* Use range information to decide whether STMT (described by STMT_INFO)
5495 could be done in a narrower type. This is effectively a forward
5496 propagation, since it uses context-independent information that applies
5497 to all users of an SSA name. */
5498
5499 static void
5500 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
5501 {
5502 tree lhs = gimple_assign_lhs (stmt);
5503 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
5504 return;
5505
5506 tree type = TREE_TYPE (lhs);
5507 if (!vect_narrowable_type_p (type))
5508 return;
5509
5510 /* First see whether we have any useful range information for the result. */
5511 unsigned int precision = TYPE_PRECISION (type);
5512 signop sign = TYPE_SIGN (type);
5513 wide_int min_value, max_value;
5514 if (!vect_get_range_info (lhs, &min_value, &max_value))
5515 return;
5516
5517 tree_code code = gimple_assign_rhs_code (stmt);
5518 unsigned int nops = gimple_num_ops (stmt);
5519
5520 if (!vect_truncatable_operation_p (code))
5521 /* Check that all relevant input operands are compatible, and update
5522 [MIN_VALUE, MAX_VALUE] to include their ranges. */
5523 for (unsigned int i = 1; i < nops; ++i)
5524 {
5525 tree op = gimple_op (stmt, i);
5526 if (TREE_CODE (op) == INTEGER_CST)
5527 {
5528 /* Don't require the integer to have RHS_TYPE (which it might
5529 not for things like shift amounts, etc.), but do require it
5530 to fit the type. */
5531 if (!int_fits_type_p (op, type))
5532 return;
5533
5534 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
5535 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
5536 }
5537 else if (TREE_CODE (op) == SSA_NAME)
5538 {
5539 /* Ignore codes that don't take uniform arguments. */
5540 if (!types_compatible_p (TREE_TYPE (op), type))
5541 return;
5542
5543 wide_int op_min_value, op_max_value;
5544 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
5545 return;
5546
5547 min_value = wi::min (min_value, op_min_value, sign);
5548 max_value = wi::max (max_value, op_max_value, sign);
5549 }
5550 else
5551 return;
5552 }
5553
5554 /* Try to switch signed types for unsigned types if we can.
5555 This is better for two reasons. First, unsigned ops tend
5556 to be cheaper than signed ops. Second, it means that we can
5557 handle things like:
5558
5559 signed char c;
5560 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5561
5562 as:
5563
5564 signed char c;
5565 unsigned short res_1 = (unsigned short) c & 0xff00;
5566 int res = (int) res_1;
5567
5568 where the intermediate result res_1 has unsigned rather than
5569 signed type. */
5570 if (sign == SIGNED && !wi::neg_p (min_value))
5571 sign = UNSIGNED;
5572
5573 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
5574 unsigned int precision1 = wi::min_precision (min_value, sign);
5575 unsigned int precision2 = wi::min_precision (max_value, sign);
5576 unsigned int value_precision = MAX (precision1, precision2);
5577 if (value_precision >= precision)
5578 return;
5579
5580 if (dump_enabled_p ())
5581 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5582 " without loss of precision: %G",
5583 sign == SIGNED ? "signed" : "unsigned",
5584 value_precision, (gimple *) stmt);
5585
5586 vect_set_operation_type (stmt_info, type, value_precision, sign);
5587 vect_set_min_input_precision (stmt_info, type, value_precision);
5588 }
5589
5590 /* Use information about the users of STMT's result to decide whether
5591 STMT (described by STMT_INFO) could be done in a narrower type.
5592 This is effectively a backward propagation. */
5593
5594 static void
5595 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
5596 {
5597 tree_code code = gimple_assign_rhs_code (stmt);
5598 unsigned int opno = (code == COND_EXPR ? 2 : 1);
5599 tree type = TREE_TYPE (gimple_op (stmt, opno));
5600 if (!vect_narrowable_type_p (type))
5601 return;
5602
5603 unsigned int precision = TYPE_PRECISION (type);
5604 unsigned int operation_precision, min_input_precision;
5605 switch (code)
5606 {
5607 CASE_CONVERT:
5608 /* Only the bits that contribute to the output matter. Don't change
5609 the precision of the operation itself. */
5610 operation_precision = precision;
5611 min_input_precision = stmt_info->min_output_precision;
5612 break;
5613
5614 case LSHIFT_EXPR:
5615 case RSHIFT_EXPR:
5616 {
5617 tree shift = gimple_assign_rhs2 (stmt);
5618 if (TREE_CODE (shift) != INTEGER_CST
5619 || !wi::ltu_p (wi::to_widest (shift), precision))
5620 return;
5621 unsigned int const_shift = TREE_INT_CST_LOW (shift);
5622 if (code == LSHIFT_EXPR)
5623 {
5624 /* Avoid creating an undefined shift.
5625
5626 ??? We could instead use min_output_precision as-is and
5627 optimize out-of-range shifts to zero. However, only
5628 degenerate testcases shift away all their useful input data,
5629 and it isn't natural to drop input operations in the middle
5630 of vectorization. This sort of thing should really be
5631 handled before vectorization. */
5632 operation_precision = MAX (stmt_info->min_output_precision,
5633 const_shift + 1);
5634 /* We need CONST_SHIFT fewer bits of the input. */
5635 min_input_precision = (MAX (operation_precision, const_shift)
5636 - const_shift);
5637 }
5638 else
5639 {
5640 /* We need CONST_SHIFT extra bits to do the operation. */
5641 operation_precision = (stmt_info->min_output_precision
5642 + const_shift);
5643 min_input_precision = operation_precision;
5644 }
5645 break;
5646 }
5647
5648 default:
5649 if (vect_truncatable_operation_p (code))
5650 {
5651 /* Input bit N has no effect on output bits N-1 and lower. */
5652 operation_precision = stmt_info->min_output_precision;
5653 min_input_precision = operation_precision;
5654 break;
5655 }
5656 return;
5657 }
5658
5659 if (operation_precision < precision)
5660 {
5661 if (dump_enabled_p ())
5662 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5663 " without affecting users: %G",
5664 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5665 operation_precision, (gimple *) stmt);
5666 vect_set_operation_type (stmt_info, type, operation_precision,
5667 TYPE_SIGN (type));
5668 }
5669 vect_set_min_input_precision (stmt_info, type, min_input_precision);
5670 }
5671
5672 /* Return true if the statement described by STMT_INFO sets a boolean
5673 SSA_NAME and if we know how to vectorize this kind of statement using
5674 vector mask types. */
5675
5676 static bool
5677 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5678 {
5679 tree lhs = gimple_get_lhs (stmt_info->stmt);
5680 if (!lhs
5681 || TREE_CODE (lhs) != SSA_NAME
5682 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5683 return false;
5684
5685 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5686 {
5687 tree_code rhs_code = gimple_assign_rhs_code (assign);
5688 switch (rhs_code)
5689 {
5690 CASE_CONVERT:
5691 case SSA_NAME:
5692 case BIT_NOT_EXPR:
5693 case BIT_IOR_EXPR:
5694 case BIT_XOR_EXPR:
5695 case BIT_AND_EXPR:
5696 return true;
5697
5698 default:
5699 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5700 }
5701 }
5702 else if (is_a <gphi *> (stmt_info->stmt))
5703 return true;
5704 return false;
5705 }
5706
5707 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5708 a vector mask type instead of a normal vector type. Record the
5709 result in STMT_INFO->mask_precision. */
5710
5711 static void
5712 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5713 {
5714 if (!possible_vector_mask_operation_p (stmt_info))
5715 return;
5716
5717 /* If at least one boolean input uses a vector mask type,
5718 pick the mask type with the narrowest elements.
5719
5720 ??? This is the traditional behavior. It should always produce
5721 the smallest number of operations, but isn't necessarily the
5722 optimal choice. For example, if we have:
5723
5724 a = b & c
5725
5726 where:
5727
5728 - the user of a wants it to have a mask type for 16-bit elements (M16)
5729 - b also uses M16
5730 - c uses a mask type for 8-bit elements (M8)
5731
5732 then picking M8 gives:
5733
5734 - 1 M16->M8 pack for b
5735 - 1 M8 AND for a
5736 - 2 M8->M16 unpacks for the user of a
5737
5738 whereas picking M16 would have given:
5739
5740 - 2 M8->M16 unpacks for c
5741 - 2 M16 ANDs for a
5742
5743 The number of operations are equal, but M16 would have given
5744 a shorter dependency chain and allowed more ILP. */
5745 unsigned int precision = ~0U;
5746 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5747 {
5748 unsigned int nops = gimple_num_ops (assign);
5749 for (unsigned int i = 1; i < nops; ++i)
5750 {
5751 tree rhs = gimple_op (assign, i);
5752 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5753 continue;
5754
5755 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5756 if (!def_stmt_info)
5757 /* Don't let external or constant operands influence the choice.
5758 We can convert them to whichever vector type we pick. */
5759 continue;
5760
5761 if (def_stmt_info->mask_precision)
5762 {
5763 if (precision > def_stmt_info->mask_precision)
5764 precision = def_stmt_info->mask_precision;
5765 }
5766 }
5767
5768 /* If the statement compares two values that shouldn't use vector masks,
5769 try comparing the values as normal scalars instead. */
5770 tree_code rhs_code = gimple_assign_rhs_code (assign);
5771 if (precision == ~0U
5772 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5773 {
5774 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5775 scalar_mode mode;
5776 tree vectype, mask_type;
5777 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5778 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5779 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5780 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5781 precision = GET_MODE_BITSIZE (mode);
5782 }
5783 }
5784 else
5785 {
5786 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5787 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5788 {
5789 tree rhs = gimple_phi_arg_def (phi, i);
5790
5791 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5792 if (!def_stmt_info)
5793 /* Don't let external or constant operands influence the choice.
5794 We can convert them to whichever vector type we pick. */
5795 continue;
5796
5797 if (def_stmt_info->mask_precision)
5798 {
5799 if (precision > def_stmt_info->mask_precision)
5800 precision = def_stmt_info->mask_precision;
5801 }
5802 }
5803 }
5804
5805 if (dump_enabled_p ())
5806 {
5807 if (precision == ~0U)
5808 dump_printf_loc (MSG_NOTE, vect_location,
5809 "using normal nonmask vectors for %G",
5810 stmt_info->stmt);
5811 else
5812 dump_printf_loc (MSG_NOTE, vect_location,
5813 "using boolean precision %d for %G",
5814 precision, stmt_info->stmt);
5815 }
5816
5817 stmt_info->mask_precision = precision;
5818 }
5819
5820 /* Handle vect_determine_precisions for STMT_INFO, given that we
5821 have already done so for the users of its result. */
5822
5823 void
5824 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5825 {
5826 vect_determine_min_output_precision (vinfo, stmt_info);
5827 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5828 {
5829 vect_determine_precisions_from_range (stmt_info, stmt);
5830 vect_determine_precisions_from_users (stmt_info, stmt);
5831 }
5832 }
5833
5834 /* Walk backwards through the vectorizable region to determine the
5835 values of these fields:
5836
5837 - min_output_precision
5838 - min_input_precision
5839 - operation_precision
5840 - operation_sign. */
5841
5842 void
5843 vect_determine_precisions (vec_info *vinfo)
5844 {
5845 DUMP_VECT_SCOPE ("vect_determine_precisions");
5846
5847 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5848 {
5849 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5850 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5851 unsigned int nbbs = loop->num_nodes;
5852
5853 for (unsigned int i = 0; i < nbbs; i++)
5854 {
5855 basic_block bb = bbs[i];
5856 for (auto gsi = gsi_start_phis (bb);
5857 !gsi_end_p (gsi); gsi_next (&gsi))
5858 {
5859 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5860 if (stmt_info)
5861 vect_determine_mask_precision (vinfo, stmt_info);
5862 }
5863 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5864 if (!is_gimple_debug (gsi_stmt (si)))
5865 vect_determine_mask_precision
5866 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5867 }
5868 for (unsigned int i = 0; i < nbbs; i++)
5869 {
5870 basic_block bb = bbs[nbbs - i - 1];
5871 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5872 !gsi_end_p (si); gsi_prev (&si))
5873 if (!is_gimple_debug (gsi_stmt (si)))
5874 vect_determine_stmt_precisions
5875 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5876 for (auto gsi = gsi_start_phis (bb);
5877 !gsi_end_p (gsi); gsi_next (&gsi))
5878 {
5879 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5880 if (stmt_info)
5881 vect_determine_stmt_precisions (vinfo, stmt_info);
5882 }
5883 }
5884 }
5885 else
5886 {
5887 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5888 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5889 {
5890 basic_block bb = bb_vinfo->bbs[i];
5891 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5892 {
5893 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5894 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5895 vect_determine_mask_precision (vinfo, stmt_info);
5896 }
5897 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5898 {
5899 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5900 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5901 vect_determine_mask_precision (vinfo, stmt_info);
5902 }
5903 }
5904 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5905 {
5906 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5907 !gsi_end_p (gsi); gsi_prev (&gsi))
5908 {
5909 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5910 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5911 vect_determine_stmt_precisions (vinfo, stmt_info);
5912 }
5913 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5914 !gsi_end_p (gsi); gsi_next (&gsi))
5915 {
5916 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5917 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5918 vect_determine_stmt_precisions (vinfo, stmt_info);
5919 }
5920 }
5921 }
5922 }
5923
5924 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5925
5926 struct vect_recog_func
5927 {
5928 vect_recog_func_ptr fn;
5929 const char *name;
5930 };
5931
5932 /* Note that ordering matters - the first pattern matching on a stmt is
5933 taken which means usually the more complex one needs to preceed the
5934 less comples onex (widen_sum only after dot_prod or sad for example). */
5935 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5936 { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
5937 { vect_recog_bit_insert_pattern, "bit_insert" },
5938 { vect_recog_over_widening_pattern, "over_widening" },
5939 /* Must come after over_widening, which narrows the shift as much as
5940 possible beforehand. */
5941 { vect_recog_average_pattern, "average" },
5942 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
5943 { vect_recog_mulhs_pattern, "mult_high" },
5944 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5945 { vect_recog_widen_mult_pattern, "widen_mult" },
5946 { vect_recog_dot_prod_pattern, "dot_prod" },
5947 { vect_recog_sad_pattern, "sad" },
5948 { vect_recog_widen_sum_pattern, "widen_sum" },
5949 { vect_recog_pow_pattern, "pow" },
5950 { vect_recog_popcount_pattern, "popcount" },
5951 { vect_recog_widen_shift_pattern, "widen_shift" },
5952 { vect_recog_rotate_pattern, "rotate" },
5953 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5954 { vect_recog_divmod_pattern, "divmod" },
5955 { vect_recog_mult_pattern, "mult" },
5956 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5957 { vect_recog_bool_pattern, "bool" },
5958 /* This must come before mask conversion, and includes the parts
5959 of mask conversion that are needed for gather and scatter
5960 internal functions. */
5961 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5962 { vect_recog_mask_conversion_pattern, "mask_conversion" },
5963 { vect_recog_widen_plus_pattern, "widen_plus" },
5964 { vect_recog_widen_minus_pattern, "widen_minus" },
5965 };
5966
5967 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5968
5969 /* Mark statements that are involved in a pattern. */
5970
5971 void
5972 vect_mark_pattern_stmts (vec_info *vinfo,
5973 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5974 tree pattern_vectype)
5975 {
5976 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5977 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5978
5979 gimple *orig_pattern_stmt = NULL;
5980 if (is_pattern_stmt_p (orig_stmt_info))
5981 {
5982 /* We're replacing a statement in an existing pattern definition
5983 sequence. */
5984 orig_pattern_stmt = orig_stmt_info->stmt;
5985 if (dump_enabled_p ())
5986 dump_printf_loc (MSG_NOTE, vect_location,
5987 "replacing earlier pattern %G", orig_pattern_stmt);
5988
5989 /* To keep the book-keeping simple, just swap the lhs of the
5990 old and new statements, so that the old one has a valid but
5991 unused lhs. */
5992 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5993 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5994 gimple_set_lhs (pattern_stmt, old_lhs);
5995
5996 if (dump_enabled_p ())
5997 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5998
5999 /* Switch to the statement that ORIG replaces. */
6000 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
6001
6002 /* We shouldn't be replacing the main pattern statement. */
6003 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
6004 != orig_pattern_stmt);
6005 }
6006
6007 if (def_seq)
6008 for (gimple_stmt_iterator si = gsi_start (def_seq);
6009 !gsi_end_p (si); gsi_next (&si))
6010 {
6011 if (dump_enabled_p ())
6012 dump_printf_loc (MSG_NOTE, vect_location,
6013 "extra pattern stmt: %G", gsi_stmt (si));
6014 stmt_vec_info pattern_stmt_info
6015 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
6016 orig_stmt_info, pattern_vectype);
6017 /* Stmts in the def sequence are not vectorizable cycle or
6018 induction defs, instead they should all be vect_internal_def
6019 feeding the main pattern stmt which retains this def type. */
6020 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
6021 }
6022
6023 if (orig_pattern_stmt)
6024 {
6025 vect_init_pattern_stmt (vinfo, pattern_stmt,
6026 orig_stmt_info, pattern_vectype);
6027
6028 /* Insert all the new pattern statements before the original one. */
6029 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6030 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
6031 orig_def_seq);
6032 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
6033 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
6034
6035 /* Remove the pattern statement that this new pattern replaces. */
6036 gsi_remove (&gsi, false);
6037 }
6038 else
6039 vect_set_pattern_stmt (vinfo,
6040 pattern_stmt, orig_stmt_info, pattern_vectype);
6041
6042 /* Transfer reduction path info to the pattern. */
6043 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
6044 {
6045 gimple_match_op op;
6046 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
6047 gcc_unreachable ();
6048 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
6049 /* Search the pattern def sequence and the main pattern stmt. Note
6050 we may have inserted all into a containing pattern def sequence
6051 so the following is a bit awkward. */
6052 gimple_stmt_iterator si;
6053 gimple *s;
6054 if (def_seq)
6055 {
6056 si = gsi_start (def_seq);
6057 s = gsi_stmt (si);
6058 gsi_next (&si);
6059 }
6060 else
6061 {
6062 si = gsi_none ();
6063 s = pattern_stmt;
6064 }
6065 do
6066 {
6067 bool found = false;
6068 if (gimple_extract_op (s, &op))
6069 for (unsigned i = 0; i < op.num_ops; ++i)
6070 if (op.ops[i] == lookfor)
6071 {
6072 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
6073 lookfor = gimple_get_lhs (s);
6074 found = true;
6075 break;
6076 }
6077 if (s == pattern_stmt)
6078 {
6079 if (!found && dump_enabled_p ())
6080 dump_printf_loc (MSG_NOTE, vect_location,
6081 "failed to update reduction index.\n");
6082 break;
6083 }
6084 if (gsi_end_p (si))
6085 s = pattern_stmt;
6086 else
6087 {
6088 s = gsi_stmt (si);
6089 if (s == pattern_stmt)
6090 /* Found the end inside a bigger pattern def seq. */
6091 si = gsi_none ();
6092 else
6093 gsi_next (&si);
6094 }
6095 } while (1);
6096 }
6097 }
6098
6099 /* Function vect_pattern_recog_1
6100
6101 Input:
6102 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
6103 computation pattern.
6104 STMT_INFO: A stmt from which the pattern search should start.
6105
6106 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
6107 a sequence of statements that has the same functionality and can be
6108 used to replace STMT_INFO. It returns the last statement in the sequence
6109 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
6110 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
6111 statement, having first checked that the target supports the new operation
6112 in that type.
6113
6114 This function also does some bookkeeping, as explained in the documentation
6115 for vect_recog_pattern. */
6116
6117 static void
6118 vect_pattern_recog_1 (vec_info *vinfo,
6119 vect_recog_func *recog_func, stmt_vec_info stmt_info)
6120 {
6121 gimple *pattern_stmt;
6122 loop_vec_info loop_vinfo;
6123 tree pattern_vectype;
6124
6125 /* If this statement has already been replaced with pattern statements,
6126 leave the original statement alone, since the first match wins.
6127 Instead try to match against the definition statements that feed
6128 the main pattern statement. */
6129 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6130 {
6131 gimple_stmt_iterator gsi;
6132 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
6133 !gsi_end_p (gsi); gsi_next (&gsi))
6134 vect_pattern_recog_1 (vinfo, recog_func,
6135 vinfo->lookup_stmt (gsi_stmt (gsi)));
6136 return;
6137 }
6138
6139 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
6140 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
6141 if (!pattern_stmt)
6142 {
6143 /* Clear any half-formed pattern definition sequence. */
6144 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
6145 return;
6146 }
6147
6148 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6149
6150 /* Found a vectorizable pattern. */
6151 if (dump_enabled_p ())
6152 dump_printf_loc (MSG_NOTE, vect_location,
6153 "%s pattern recognized: %G",
6154 recog_func->name, pattern_stmt);
6155
6156 /* Mark the stmts that are involved in the pattern. */
6157 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
6158
6159 /* Patterns cannot be vectorized using SLP, because they change the order of
6160 computation. */
6161 if (loop_vinfo)
6162 {
6163 unsigned ix, ix2;
6164 stmt_vec_info *elem_ptr;
6165 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
6166 elem_ptr, *elem_ptr == stmt_info);
6167 }
6168 }
6169
6170
6171 /* Function vect_pattern_recog
6172
6173 Input:
6174 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
6175 computation idioms.
6176
6177 Output - for each computation idiom that is detected we create a new stmt
6178 that provides the same functionality and that can be vectorized. We
6179 also record some information in the struct_stmt_info of the relevant
6180 stmts, as explained below:
6181
6182 At the entry to this function we have the following stmts, with the
6183 following initial value in the STMT_VINFO fields:
6184
6185 stmt in_pattern_p related_stmt vec_stmt
6186 S1: a_i = .... - - -
6187 S2: a_2 = ..use(a_i).. - - -
6188 S3: a_1 = ..use(a_2).. - - -
6189 S4: a_0 = ..use(a_1).. - - -
6190 S5: ... = ..use(a_0).. - - -
6191
6192 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
6193 represented by a single stmt. We then:
6194 - create a new stmt S6 equivalent to the pattern (the stmt is not
6195 inserted into the code)
6196 - fill in the STMT_VINFO fields as follows:
6197
6198 in_pattern_p related_stmt vec_stmt
6199 S1: a_i = .... - - -
6200 S2: a_2 = ..use(a_i).. - - -
6201 S3: a_1 = ..use(a_2).. - - -
6202 S4: a_0 = ..use(a_1).. true S6 -
6203 '---> S6: a_new = .... - S4 -
6204 S5: ... = ..use(a_0).. - - -
6205
6206 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
6207 to each other through the RELATED_STMT field).
6208
6209 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
6210 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
6211 remain irrelevant unless used by stmts other than S4.
6212
6213 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
6214 (because they are marked as irrelevant). It will vectorize S6, and record
6215 a pointer to the new vector stmt VS6 from S6 (as usual).
6216 S4 will be skipped, and S5 will be vectorized as usual:
6217
6218 in_pattern_p related_stmt vec_stmt
6219 S1: a_i = .... - - -
6220 S2: a_2 = ..use(a_i).. - - -
6221 S3: a_1 = ..use(a_2).. - - -
6222 > VS6: va_new = .... - - -
6223 S4: a_0 = ..use(a_1).. true S6 VS6
6224 '---> S6: a_new = .... - S4 VS6
6225 > VS5: ... = ..vuse(va_new).. - - -
6226 S5: ... = ..use(a_0).. - - -
6227
6228 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
6229 elsewhere), and we'll end up with:
6230
6231 VS6: va_new = ....
6232 VS5: ... = ..vuse(va_new)..
6233
6234 In case of more than one pattern statements, e.g., widen-mult with
6235 intermediate type:
6236
6237 S1 a_t = ;
6238 S2 a_T = (TYPE) a_t;
6239 '--> S3: a_it = (interm_type) a_t;
6240 S4 prod_T = a_T * CONST;
6241 '--> S5: prod_T' = a_it w* CONST;
6242
6243 there may be other users of a_T outside the pattern. In that case S2 will
6244 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
6245 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
6246 be recorded in S3. */
6247
6248 void
6249 vect_pattern_recog (vec_info *vinfo)
6250 {
6251 class loop *loop;
6252 basic_block *bbs;
6253 unsigned int nbbs;
6254 gimple_stmt_iterator si;
6255 unsigned int i, j;
6256
6257 vect_determine_precisions (vinfo);
6258
6259 DUMP_VECT_SCOPE ("vect_pattern_recog");
6260
6261 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
6262 {
6263 loop = LOOP_VINFO_LOOP (loop_vinfo);
6264 bbs = LOOP_VINFO_BBS (loop_vinfo);
6265 nbbs = loop->num_nodes;
6266
6267 /* Scan through the loop stmts, applying the pattern recognition
6268 functions starting at each stmt visited: */
6269 for (i = 0; i < nbbs; i++)
6270 {
6271 basic_block bb = bbs[i];
6272 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
6273 {
6274 if (is_gimple_debug (gsi_stmt (si)))
6275 continue;
6276 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
6277 /* Scan over all generic vect_recog_xxx_pattern functions. */
6278 for (j = 0; j < NUM_PATTERNS; j++)
6279 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
6280 stmt_info);
6281 }
6282 }
6283 }
6284 else
6285 {
6286 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6287 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6288 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
6289 !gsi_end_p (gsi); gsi_next (&gsi))
6290 {
6291 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
6292 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
6293 continue;
6294
6295 /* Scan over all generic vect_recog_xxx_pattern functions. */
6296 for (j = 0; j < NUM_PATTERNS; j++)
6297 vect_pattern_recog_1 (vinfo,
6298 &vect_vect_recog_func_ptrs[j], stmt_info);
6299 }
6300 }
6301
6302 /* After this no more add_stmt calls are allowed. */
6303 vinfo->stmt_vec_info_ro = true;
6304 }