]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-patterns.c
coroutines: Make call argument handling more robust [PR95440]
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51
52 /* Return true if we have a useful VR_RANGE range for VAR, storing it
53 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
54
55 static bool
56 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
57 {
58 value_range_kind vr_type = get_range_info (var, min_value, max_value);
59 wide_int nonzero = get_nonzero_bits (var);
60 signop sgn = TYPE_SIGN (TREE_TYPE (var));
61 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
62 nonzero, sgn) == VR_RANGE)
63 {
64 if (dump_enabled_p ())
65 {
66 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
67 dump_printf (MSG_NOTE, " has range [");
68 dump_hex (MSG_NOTE, *min_value);
69 dump_printf (MSG_NOTE, ", ");
70 dump_hex (MSG_NOTE, *max_value);
71 dump_printf (MSG_NOTE, "]\n");
72 }
73 return true;
74 }
75 else
76 {
77 if (dump_enabled_p ())
78 {
79 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 dump_printf (MSG_NOTE, " has no range info\n");
81 }
82 return false;
83 }
84 }
85
86 /* Report that we've found an instance of pattern PATTERN in
87 statement STMT. */
88
89 static void
90 vect_pattern_detected (const char *name, gimple *stmt)
91 {
92 if (dump_enabled_p ())
93 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
94 }
95
96 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
97 return the pattern statement's stmt_vec_info. Set its vector type to
98 VECTYPE if it doesn't have one already. */
99
100 static stmt_vec_info
101 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
102 stmt_vec_info orig_stmt_info, tree vectype)
103 {
104 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
105 if (pattern_stmt_info == NULL)
106 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
107 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
108
109 pattern_stmt_info->pattern_stmt_p = true;
110 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
111 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
112 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
113 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
114 {
115 gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
116 == vect_use_mask_type_p (orig_stmt_info));
117 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
118 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
119 }
120 return pattern_stmt_info;
121 }
122
123 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
124 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
125 have one already. */
126
127 static void
128 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
129 stmt_vec_info orig_stmt_info, tree vectype)
130 {
131 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
132 STMT_VINFO_RELATED_STMT (orig_stmt_info)
133 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
134 }
135
136 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
137 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
138 be different from the vector type of the final pattern statement.
139 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
140 from which it was derived. */
141
142 static inline void
143 append_pattern_def_seq (vec_info *vinfo,
144 stmt_vec_info stmt_info, gimple *new_stmt,
145 tree vectype = NULL_TREE,
146 tree scalar_type_for_mask = NULL_TREE)
147 {
148 gcc_assert (!scalar_type_for_mask
149 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
150 if (vectype)
151 {
152 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
153 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
154 if (scalar_type_for_mask)
155 new_stmt_info->mask_precision
156 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
157 }
158 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
159 new_stmt);
160 }
161
162 /* The caller wants to perform new operations on vect_external variable
163 VAR, so that the result of the operations would also be vect_external.
164 Return the edge on which the operations can be performed, if one exists.
165 Return null if the operations should instead be treated as part of
166 the pattern that needs them. */
167
168 static edge
169 vect_get_external_def_edge (vec_info *vinfo, tree var)
170 {
171 edge e = NULL;
172 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
173 {
174 e = loop_preheader_edge (loop_vinfo->loop);
175 if (!SSA_NAME_IS_DEFAULT_DEF (var))
176 {
177 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
178 if (bb == NULL
179 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
180 e = NULL;
181 }
182 }
183 return e;
184 }
185
186 /* Return true if the target supports a vector version of CODE,
187 where CODE is known to map to a direct optab. ITYPE specifies
188 the type of (some of) the scalar inputs and OTYPE specifies the
189 type of the scalar result.
190
191 If CODE allows the inputs and outputs to have different type
192 (such as for WIDEN_SUM_EXPR), it is the input mode rather
193 than the output mode that determines the appropriate target pattern.
194 Operand 0 of the target pattern then specifies the mode that the output
195 must have.
196
197 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
198 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
199 is nonnull. */
200
201 static bool
202 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
203 tree itype, tree *vecotype_out,
204 tree *vecitype_out = NULL)
205 {
206 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
207 if (!vecitype)
208 return false;
209
210 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
211 if (!vecotype)
212 return false;
213
214 optab optab = optab_for_tree_code (code, vecitype, optab_default);
215 if (!optab)
216 return false;
217
218 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
219 if (icode == CODE_FOR_nothing
220 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
221 return false;
222
223 *vecotype_out = vecotype;
224 if (vecitype_out)
225 *vecitype_out = vecitype;
226 return true;
227 }
228
229 /* Round bit precision PRECISION up to a full element. */
230
231 static unsigned int
232 vect_element_precision (unsigned int precision)
233 {
234 precision = 1 << ceil_log2 (precision);
235 return MAX (precision, BITS_PER_UNIT);
236 }
237
238 /* If OP is defined by a statement that's being considered for vectorization,
239 return information about that statement, otherwise return NULL. */
240
241 static stmt_vec_info
242 vect_get_internal_def (vec_info *vinfo, tree op)
243 {
244 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
245 if (def_stmt_info
246 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
247 return def_stmt_info;
248 return NULL;
249 }
250
251 /* Check whether NAME, an ssa-name used in STMT_VINFO,
252 is a result of a type promotion, such that:
253 DEF_STMT: NAME = NOP (name0)
254 If CHECK_SIGN is TRUE, check that either both types are signed or both are
255 unsigned. */
256
257 static bool
258 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
259 tree *orig_type, gimple **def_stmt, bool *promotion)
260 {
261 tree type = TREE_TYPE (name);
262 tree oprnd0;
263 enum vect_def_type dt;
264
265 stmt_vec_info def_stmt_info;
266 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
267 return false;
268
269 if (dt != vect_internal_def
270 && dt != vect_external_def && dt != vect_constant_def)
271 return false;
272
273 if (!*def_stmt)
274 return false;
275
276 if (!is_gimple_assign (*def_stmt))
277 return false;
278
279 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
280 return false;
281
282 oprnd0 = gimple_assign_rhs1 (*def_stmt);
283
284 *orig_type = TREE_TYPE (oprnd0);
285 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
286 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
287 return false;
288
289 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
290 *promotion = true;
291 else
292 *promotion = false;
293
294 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
295 return false;
296
297 return true;
298 }
299
300 /* Holds information about an input operand after some sign changes
301 and type promotions have been peeled away. */
302 class vect_unpromoted_value {
303 public:
304 vect_unpromoted_value ();
305
306 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
307
308 /* The value obtained after peeling away zero or more casts. */
309 tree op;
310
311 /* The type of OP. */
312 tree type;
313
314 /* The definition type of OP. */
315 vect_def_type dt;
316
317 /* If OP is the result of peeling at least one cast, and if the cast
318 of OP itself is a vectorizable statement, CASTER identifies that
319 statement, otherwise it is null. */
320 stmt_vec_info caster;
321 };
322
323 inline vect_unpromoted_value::vect_unpromoted_value ()
324 : op (NULL_TREE),
325 type (NULL_TREE),
326 dt (vect_uninitialized_def),
327 caster (NULL)
328 {
329 }
330
331 /* Set the operand to OP_IN, its definition type to DT_IN, and the
332 statement that casts it to CASTER_IN. */
333
334 inline void
335 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
336 stmt_vec_info caster_in)
337 {
338 op = op_in;
339 type = TREE_TYPE (op);
340 dt = dt_in;
341 caster = caster_in;
342 }
343
344 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
345 to reach some vectorizable inner operand OP', continuing as long as it
346 is possible to convert OP' back to OP using a possible sign change
347 followed by a possible promotion P. Return this OP', or null if OP is
348 not a vectorizable SSA name. If there is a promotion P, describe its
349 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
350 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
351 have more than one user.
352
353 A successful return means that it is possible to go from OP' to OP
354 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
355 whereas the cast from UNPROM to OP might be a promotion, a sign
356 change, or a nop.
357
358 E.g. say we have:
359
360 signed short *ptr = ...;
361 signed short C = *ptr;
362 unsigned short B = (unsigned short) C; // sign change
363 signed int A = (signed int) B; // unsigned promotion
364 ...possible other uses of A...
365 unsigned int OP = (unsigned int) A; // sign change
366
367 In this case it's possible to go directly from C to OP using:
368
369 OP = (unsigned int) (unsigned short) C;
370 +------------+ +--------------+
371 promotion sign change
372
373 so OP' would be C. The input to the promotion is B, so UNPROM
374 would describe B. */
375
376 static tree
377 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
378 vect_unpromoted_value *unprom,
379 bool *single_use_p = NULL)
380 {
381 tree res = NULL_TREE;
382 tree op_type = TREE_TYPE (op);
383 unsigned int orig_precision = TYPE_PRECISION (op_type);
384 unsigned int min_precision = orig_precision;
385 stmt_vec_info caster = NULL;
386 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
387 {
388 /* See whether OP is simple enough to vectorize. */
389 stmt_vec_info def_stmt_info;
390 gimple *def_stmt;
391 vect_def_type dt;
392 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
393 break;
394
395 /* If OP is the input of a demotion, skip over it to see whether
396 OP is itself the result of a promotion. If so, the combined
397 effect of the promotion and the demotion might fit the required
398 pattern, otherwise neither operation fits.
399
400 This copes with cases such as the result of an arithmetic
401 operation being truncated before being stored, and where that
402 arithmetic operation has been recognized as an over-widened one. */
403 if (TYPE_PRECISION (op_type) <= min_precision)
404 {
405 /* Use OP as the UNPROM described above if we haven't yet
406 found a promotion, or if using the new input preserves the
407 sign of the previous promotion. */
408 if (!res
409 || TYPE_PRECISION (unprom->type) == orig_precision
410 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
411 {
412 unprom->set_op (op, dt, caster);
413 min_precision = TYPE_PRECISION (op_type);
414 }
415 /* Stop if we've already seen a promotion and if this
416 conversion does more than change the sign. */
417 else if (TYPE_PRECISION (op_type)
418 != TYPE_PRECISION (unprom->type))
419 break;
420
421 /* The sequence now extends to OP. */
422 res = op;
423 }
424
425 /* See whether OP is defined by a cast. Record it as CASTER if
426 the cast is potentially vectorizable. */
427 if (!def_stmt)
428 break;
429 caster = def_stmt_info;
430
431 /* Ignore pattern statements, since we don't link uses for them. */
432 if (caster
433 && single_use_p
434 && !STMT_VINFO_RELATED_STMT (caster)
435 && !has_single_use (res))
436 *single_use_p = false;
437
438 gassign *assign = dyn_cast <gassign *> (def_stmt);
439 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
440 break;
441
442 /* Continue with the input to the cast. */
443 op = gimple_assign_rhs1 (def_stmt);
444 op_type = TREE_TYPE (op);
445 }
446 return res;
447 }
448
449 /* OP is an integer operand to an operation that returns TYPE, and we
450 want to treat the operation as a widening one. So far we can treat
451 it as widening from *COMMON_TYPE.
452
453 Return true if OP is suitable for such a widening operation,
454 either widening from *COMMON_TYPE or from some supertype of it.
455 Update *COMMON_TYPE to the supertype in the latter case.
456
457 SHIFT_P is true if OP is a shift amount. */
458
459 static bool
460 vect_joust_widened_integer (tree type, bool shift_p, tree op,
461 tree *common_type)
462 {
463 /* Calculate the minimum precision required by OP, without changing
464 the sign of either operand. */
465 unsigned int precision;
466 if (shift_p)
467 {
468 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
469 return false;
470 precision = TREE_INT_CST_LOW (op);
471 }
472 else
473 {
474 precision = wi::min_precision (wi::to_widest (op),
475 TYPE_SIGN (*common_type));
476 if (precision * 2 > TYPE_PRECISION (type))
477 return false;
478 }
479
480 /* If OP requires a wider type, switch to that type. The checks
481 above ensure that this is still narrower than the result. */
482 precision = vect_element_precision (precision);
483 if (TYPE_PRECISION (*common_type) < precision)
484 *common_type = build_nonstandard_integer_type
485 (precision, TYPE_UNSIGNED (*common_type));
486 return true;
487 }
488
489 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
490 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
491
492 static bool
493 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
494 {
495 if (types_compatible_p (*common_type, new_type))
496 return true;
497
498 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
499 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
500 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
501 return true;
502
503 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
504 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
505 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
506 {
507 *common_type = new_type;
508 return true;
509 }
510
511 /* We have mismatched signs, with the signed type being
512 no wider than the unsigned type. In this case we need
513 a wider signed type. */
514 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
515 TYPE_PRECISION (new_type));
516 precision *= 2;
517 if (precision * 2 > TYPE_PRECISION (type))
518 return false;
519
520 *common_type = build_nonstandard_integer_type (precision, false);
521 return true;
522 }
523
524 /* Check whether STMT_INFO can be viewed as a tree of integer operations
525 in which each node either performs CODE or WIDENED_CODE, and where
526 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
527 specifies the maximum number of leaf operands. SHIFT_P says whether
528 CODE and WIDENED_CODE are some sort of shift.
529
530 If STMT_INFO is such a tree, return the number of leaf operands
531 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
532 to a type that (a) is narrower than the result of STMT_INFO and
533 (b) can hold all leaf operand values.
534
535 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
536 exists. */
537
538 static unsigned int
539 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
540 tree_code widened_code, bool shift_p,
541 unsigned int max_nops,
542 vect_unpromoted_value *unprom, tree *common_type)
543 {
544 /* Check for an integer operation with the right code. */
545 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
546 if (!assign)
547 return 0;
548
549 tree_code rhs_code = gimple_assign_rhs_code (assign);
550 if (rhs_code != code && rhs_code != widened_code)
551 return 0;
552
553 tree type = gimple_expr_type (assign);
554 if (!INTEGRAL_TYPE_P (type))
555 return 0;
556
557 /* Assume that both operands will be leaf operands. */
558 max_nops -= 2;
559
560 /* Check the operands. */
561 unsigned int next_op = 0;
562 for (unsigned int i = 0; i < 2; ++i)
563 {
564 vect_unpromoted_value *this_unprom = &unprom[next_op];
565 unsigned int nops = 1;
566 tree op = gimple_op (assign, i + 1);
567 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
568 {
569 /* We already have a common type from earlier operands.
570 Update it to account for OP. */
571 this_unprom->set_op (op, vect_constant_def);
572 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
573 return 0;
574 }
575 else
576 {
577 /* Only allow shifts by constants. */
578 if (shift_p && i == 1)
579 return 0;
580
581 if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
582 return 0;
583
584 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
585 {
586 /* The operand isn't widened. If STMT_INFO has the code
587 for an unwidened operation, recursively check whether
588 this operand is a node of the tree. */
589 if (rhs_code != code
590 || max_nops == 0
591 || this_unprom->dt != vect_internal_def)
592 return 0;
593
594 /* Give back the leaf slot allocated above now that we're
595 not treating this as a leaf operand. */
596 max_nops += 1;
597
598 /* Recursively process the definition of the operand. */
599 stmt_vec_info def_stmt_info
600 = vinfo->lookup_def (this_unprom->op);
601 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
602 widened_code, shift_p, max_nops,
603 this_unprom, common_type);
604 if (nops == 0)
605 return 0;
606
607 max_nops -= nops;
608 }
609 else
610 {
611 /* Make sure that the operand is narrower than the result. */
612 if (TYPE_PRECISION (this_unprom->type) * 2
613 > TYPE_PRECISION (type))
614 return 0;
615
616 /* Update COMMON_TYPE for the new operand. */
617 if (i == 0)
618 *common_type = this_unprom->type;
619 else if (!vect_joust_widened_type (type, this_unprom->type,
620 common_type))
621 return 0;
622 }
623 }
624 next_op += nops;
625 }
626 return next_op;
627 }
628
629 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
630 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
631
632 static tree
633 vect_recog_temp_ssa_var (tree type, gimple *stmt)
634 {
635 return make_temp_ssa_name (type, stmt, "patt");
636 }
637
638 /* STMT2_INFO describes a type conversion that could be split into STMT1
639 followed by a version of STMT2_INFO that takes NEW_RHS as its first
640 input. Try to do this using pattern statements, returning true on
641 success. */
642
643 static bool
644 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
645 gimple *stmt1, tree vectype)
646 {
647 if (is_pattern_stmt_p (stmt2_info))
648 {
649 /* STMT2_INFO is part of a pattern. Get the statement to which
650 the pattern is attached. */
651 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
652 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
653
654 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE, vect_location,
656 "Splitting pattern statement: %G", stmt2_info->stmt);
657
658 /* Since STMT2_INFO is a pattern statement, we can change it
659 in-situ without worrying about changing the code for the
660 containing block. */
661 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
662
663 if (dump_enabled_p ())
664 {
665 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
666 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
667 stmt2_info->stmt);
668 }
669
670 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
671 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
672 /* STMT2_INFO is the actual pattern statement. Add STMT1
673 to the end of the definition sequence. */
674 gimple_seq_add_stmt_without_update (def_seq, stmt1);
675 else
676 {
677 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
678 before it. */
679 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
680 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
681 }
682 return true;
683 }
684 else
685 {
686 /* STMT2_INFO doesn't yet have a pattern. Try to create a
687 two-statement pattern now. */
688 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
689 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
690 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
691 if (!lhs_vectype)
692 return false;
693
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_NOTE, vect_location,
696 "Splitting statement: %G", stmt2_info->stmt);
697
698 /* Add STMT1 as a singleton pattern definition sequence. */
699 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
700 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
701 gimple_seq_add_stmt_without_update (def_seq, stmt1);
702
703 /* Build the second of the two pattern statements. */
704 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
705 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
706 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
707
708 if (dump_enabled_p ())
709 {
710 dump_printf_loc (MSG_NOTE, vect_location,
711 "into pattern statements: %G", stmt1);
712 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
713 }
714
715 return true;
716 }
717 }
718
719 /* Convert UNPROM to TYPE and return the result, adding new statements
720 to STMT_INFO's pattern definition statements if no better way is
721 available. VECTYPE is the vector form of TYPE. */
722
723 static tree
724 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
725 vect_unpromoted_value *unprom, tree vectype)
726 {
727 /* Check for a no-op conversion. */
728 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
729 return unprom->op;
730
731 /* Allow the caller to create constant vect_unpromoted_values. */
732 if (TREE_CODE (unprom->op) == INTEGER_CST)
733 return wide_int_to_tree (type, wi::to_widest (unprom->op));
734
735 tree input = unprom->op;
736 if (unprom->caster)
737 {
738 tree lhs = gimple_get_lhs (unprom->caster->stmt);
739 tree lhs_type = TREE_TYPE (lhs);
740
741 /* If the result of the existing cast is the right width, use it
742 instead of the source of the cast. */
743 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
744 input = lhs;
745 /* If the precision we want is between the source and result
746 precisions of the existing cast, try splitting the cast into
747 two and tapping into a mid-way point. */
748 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
749 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
750 {
751 /* In order to preserve the semantics of the original cast,
752 give the mid-way point the same signedness as the input value.
753
754 It would be possible to use a signed type here instead if
755 TYPE is signed and UNPROM->TYPE is unsigned, but that would
756 make the sign of the midtype sensitive to the order in
757 which we process the statements, since the signedness of
758 TYPE is the signedness required by just one of possibly
759 many users. Also, unsigned promotions are usually as cheap
760 as or cheaper than signed ones, so it's better to keep an
761 unsigned promotion. */
762 tree midtype = build_nonstandard_integer_type
763 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
764 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
765 if (vec_midtype)
766 {
767 input = vect_recog_temp_ssa_var (midtype, NULL);
768 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
769 unprom->op);
770 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
771 vec_midtype))
772 append_pattern_def_seq (vinfo, stmt_info,
773 new_stmt, vec_midtype);
774 }
775 }
776
777 /* See if we can reuse an existing result. */
778 if (types_compatible_p (type, TREE_TYPE (input)))
779 return input;
780 }
781
782 /* We need a new conversion statement. */
783 tree new_op = vect_recog_temp_ssa_var (type, NULL);
784 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
785
786 /* If OP is an external value, see if we can insert the new statement
787 on an incoming edge. */
788 if (input == unprom->op && unprom->dt == vect_external_def)
789 if (edge e = vect_get_external_def_edge (vinfo, input))
790 {
791 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
792 gcc_assert (!new_bb);
793 return new_op;
794 }
795
796 /* As a (common) last resort, add the statement to the pattern itself. */
797 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
798 return new_op;
799 }
800
801 /* Invoke vect_convert_input for N elements of UNPROM and store the
802 result in the corresponding elements of RESULT. */
803
804 static void
805 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
806 tree *result, tree type, vect_unpromoted_value *unprom,
807 tree vectype)
808 {
809 for (unsigned int i = 0; i < n; ++i)
810 {
811 unsigned int j;
812 for (j = 0; j < i; ++j)
813 if (unprom[j].op == unprom[i].op)
814 break;
815 if (j < i)
816 result[i] = result[j];
817 else
818 result[i] = vect_convert_input (vinfo, stmt_info,
819 type, &unprom[i], vectype);
820 }
821 }
822
823 /* The caller has created a (possibly empty) sequence of pattern definition
824 statements followed by a single statement PATTERN_STMT. Cast the result
825 of this final statement to TYPE. If a new statement is needed, add
826 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
827 and return the new statement, otherwise return PATTERN_STMT as-is.
828 VECITYPE is the vector form of PATTERN_STMT's result type. */
829
830 static gimple *
831 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
832 gimple *pattern_stmt, tree vecitype)
833 {
834 tree lhs = gimple_get_lhs (pattern_stmt);
835 if (!types_compatible_p (type, TREE_TYPE (lhs)))
836 {
837 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
838 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
839 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
840 }
841 return pattern_stmt;
842 }
843
844 /* Return true if STMT_VINFO describes a reduction for which reassociation
845 is allowed. If STMT_INFO is part of a group, assume that it's part of
846 a reduction chain and optimistically assume that all statements
847 except the last allow reassociation.
848 Also require it to have code CODE and to be a reduction
849 in the outermost loop. When returning true, store the operands in
850 *OP0_OUT and *OP1_OUT. */
851
852 static bool
853 vect_reassociating_reduction_p (vec_info *vinfo,
854 stmt_vec_info stmt_info, tree_code code,
855 tree *op0_out, tree *op1_out)
856 {
857 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
858 if (!loop_info)
859 return false;
860
861 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
862 if (!assign || gimple_assign_rhs_code (assign) != code)
863 return false;
864
865 /* We don't allow changing the order of the computation in the inner-loop
866 when doing outer-loop vectorization. */
867 class loop *loop = LOOP_VINFO_LOOP (loop_info);
868 if (loop && nested_in_vect_loop_p (loop, stmt_info))
869 return false;
870
871 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
872 {
873 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
874 code))
875 return false;
876 }
877 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
878 return false;
879
880 *op0_out = gimple_assign_rhs1 (assign);
881 *op1_out = gimple_assign_rhs2 (assign);
882 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
883 std::swap (*op0_out, *op1_out);
884 return true;
885 }
886
887 /* Function vect_recog_dot_prod_pattern
888
889 Try to find the following pattern:
890
891 type x_t, y_t;
892 TYPE1 prod;
893 TYPE2 sum = init;
894 loop:
895 sum_0 = phi <init, sum_1>
896 S1 x_t = ...
897 S2 y_t = ...
898 S3 x_T = (TYPE1) x_t;
899 S4 y_T = (TYPE1) y_t;
900 S5 prod = x_T * y_T;
901 [S6 prod = (TYPE2) prod; #optional]
902 S7 sum_1 = prod + sum_0;
903
904 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
905 same size of 'TYPE1' or bigger. This is a special case of a reduction
906 computation.
907
908 Input:
909
910 * STMT_VINFO: The stmt from which the pattern search begins. In the
911 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
912 will be detected.
913
914 Output:
915
916 * TYPE_OUT: The type of the output of this pattern.
917
918 * Return value: A new stmt that will be used to replace the sequence of
919 stmts that constitute the pattern. In this case it will be:
920 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
921
922 Note: The dot-prod idiom is a widening reduction pattern that is
923 vectorized without preserving all the intermediate results. It
924 produces only N/2 (widened) results (by summing up pairs of
925 intermediate results) rather than all N results. Therefore, we
926 cannot allow this pattern when we want to get all the results and in
927 the correct order (as is the case when this computation is in an
928 inner-loop nested in an outer-loop that us being vectorized). */
929
930 static gimple *
931 vect_recog_dot_prod_pattern (vec_info *vinfo,
932 stmt_vec_info stmt_vinfo, tree *type_out)
933 {
934 tree oprnd0, oprnd1;
935 gimple *last_stmt = stmt_vinfo->stmt;
936 tree type, half_type;
937 gimple *pattern_stmt;
938 tree var;
939
940 /* Look for the following pattern
941 DX = (TYPE1) X;
942 DY = (TYPE1) Y;
943 DPROD = DX * DY;
944 DDPROD = (TYPE2) DPROD;
945 sum_1 = DDPROD + sum_0;
946 In which
947 - DX is double the size of X
948 - DY is double the size of Y
949 - DX, DY, DPROD all have the same type
950 - sum is the same size of DPROD or bigger
951 - sum has been recognized as a reduction variable.
952
953 This is equivalent to:
954 DPROD = X w* Y; #widen mult
955 sum_1 = DPROD w+ sum_0; #widen summation
956 or
957 DPROD = X w* Y; #widen mult
958 sum_1 = DPROD + sum_0; #summation
959 */
960
961 /* Starting from LAST_STMT, follow the defs of its uses in search
962 of the above pattern. */
963
964 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
965 &oprnd0, &oprnd1))
966 return NULL;
967
968 type = gimple_expr_type (last_stmt);
969
970 vect_unpromoted_value unprom_mult;
971 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
972
973 /* So far so good. Since last_stmt was detected as a (summation) reduction,
974 we know that oprnd1 is the reduction variable (defined by a loop-header
975 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
976 Left to check that oprnd0 is defined by a (widen_)mult_expr */
977 if (!oprnd0)
978 return NULL;
979
980 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
981 if (!mult_vinfo)
982 return NULL;
983
984 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
985 inside the loop (in case we are analyzing an outer-loop). */
986 vect_unpromoted_value unprom0[2];
987 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
988 false, 2, unprom0, &half_type))
989 return NULL;
990
991 /* If there are two widening operations, make sure they agree on
992 the sign of the extension. */
993 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
994 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
995 return NULL;
996
997 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
998
999 tree half_vectype;
1000 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1001 type_out, &half_vectype))
1002 return NULL;
1003
1004 /* Get the inputs in the appropriate types. */
1005 tree mult_oprnd[2];
1006 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1007 unprom0, half_vectype);
1008
1009 var = vect_recog_temp_ssa_var (type, NULL);
1010 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1011 mult_oprnd[0], mult_oprnd[1], oprnd1);
1012
1013 return pattern_stmt;
1014 }
1015
1016
1017 /* Function vect_recog_sad_pattern
1018
1019 Try to find the following Sum of Absolute Difference (SAD) pattern:
1020
1021 type x_t, y_t;
1022 signed TYPE1 diff, abs_diff;
1023 TYPE2 sum = init;
1024 loop:
1025 sum_0 = phi <init, sum_1>
1026 S1 x_t = ...
1027 S2 y_t = ...
1028 S3 x_T = (TYPE1) x_t;
1029 S4 y_T = (TYPE1) y_t;
1030 S5 diff = x_T - y_T;
1031 S6 abs_diff = ABS_EXPR <diff>;
1032 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1033 S8 sum_1 = abs_diff + sum_0;
1034
1035 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1036 same size of 'TYPE1' or bigger. This is a special case of a reduction
1037 computation.
1038
1039 Input:
1040
1041 * STMT_VINFO: The stmt from which the pattern search begins. In the
1042 example, when this function is called with S8, the pattern
1043 {S3,S4,S5,S6,S7,S8} will be detected.
1044
1045 Output:
1046
1047 * TYPE_OUT: The type of the output of this pattern.
1048
1049 * Return value: A new stmt that will be used to replace the sequence of
1050 stmts that constitute the pattern. In this case it will be:
1051 SAD_EXPR <x_t, y_t, sum_0>
1052 */
1053
1054 static gimple *
1055 vect_recog_sad_pattern (vec_info *vinfo,
1056 stmt_vec_info stmt_vinfo, tree *type_out)
1057 {
1058 gimple *last_stmt = stmt_vinfo->stmt;
1059 tree half_type;
1060
1061 /* Look for the following pattern
1062 DX = (TYPE1) X;
1063 DY = (TYPE1) Y;
1064 DDIFF = DX - DY;
1065 DAD = ABS_EXPR <DDIFF>;
1066 DDPROD = (TYPE2) DPROD;
1067 sum_1 = DAD + sum_0;
1068 In which
1069 - DX is at least double the size of X
1070 - DY is at least double the size of Y
1071 - DX, DY, DDIFF, DAD all have the same type
1072 - sum is the same size of DAD or bigger
1073 - sum has been recognized as a reduction variable.
1074
1075 This is equivalent to:
1076 DDIFF = X w- Y; #widen sub
1077 DAD = ABS_EXPR <DDIFF>;
1078 sum_1 = DAD w+ sum_0; #widen summation
1079 or
1080 DDIFF = X w- Y; #widen sub
1081 DAD = ABS_EXPR <DDIFF>;
1082 sum_1 = DAD + sum_0; #summation
1083 */
1084
1085 /* Starting from LAST_STMT, follow the defs of its uses in search
1086 of the above pattern. */
1087
1088 tree plus_oprnd0, plus_oprnd1;
1089 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1090 &plus_oprnd0, &plus_oprnd1))
1091 return NULL;
1092
1093 tree sum_type = gimple_expr_type (last_stmt);
1094
1095 /* Any non-truncating sequence of conversions is OK here, since
1096 with a successful match, the result of the ABS(U) is known to fit
1097 within the nonnegative range of the result type. (It cannot be the
1098 negative of the minimum signed value due to the range of the widening
1099 MINUS_EXPR.) */
1100 vect_unpromoted_value unprom_abs;
1101 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1102 &unprom_abs);
1103
1104 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1105 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1106 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1107 Then check that plus_oprnd0 is defined by an abs_expr. */
1108
1109 if (!plus_oprnd0)
1110 return NULL;
1111
1112 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1113 if (!abs_stmt_vinfo)
1114 return NULL;
1115
1116 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1117 inside the loop (in case we are analyzing an outer-loop). */
1118 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1119 if (!abs_stmt
1120 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1121 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1122 return NULL;
1123
1124 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1125 tree abs_type = TREE_TYPE (abs_oprnd);
1126 if (TYPE_UNSIGNED (abs_type))
1127 return NULL;
1128
1129 /* Peel off conversions from the ABS input. This can involve sign
1130 changes (e.g. from an unsigned subtraction to a signed ABS input)
1131 or signed promotion, but it can't include unsigned promotion.
1132 (Note that ABS of an unsigned promotion should have been folded
1133 away before now anyway.) */
1134 vect_unpromoted_value unprom_diff;
1135 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1136 &unprom_diff);
1137 if (!abs_oprnd)
1138 return NULL;
1139 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1140 && TYPE_UNSIGNED (unprom_diff.type))
1141 return NULL;
1142
1143 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1144 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1145 if (!diff_stmt_vinfo)
1146 return NULL;
1147
1148 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1149 inside the loop (in case we are analyzing an outer-loop). */
1150 vect_unpromoted_value unprom[2];
1151 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1152 false, 2, unprom, &half_type))
1153 return NULL;
1154
1155 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1156
1157 tree half_vectype;
1158 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1159 type_out, &half_vectype))
1160 return NULL;
1161
1162 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1163 tree sad_oprnd[2];
1164 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1165 unprom, half_vectype);
1166
1167 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1168 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1169 sad_oprnd[1], plus_oprnd1);
1170
1171 return pattern_stmt;
1172 }
1173
1174 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1175 so that it can be treated as though it had the form:
1176
1177 A_TYPE a;
1178 B_TYPE b;
1179 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1180 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1181 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1182 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1183 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1184
1185 Try to replace the pattern with:
1186
1187 A_TYPE a;
1188 B_TYPE b;
1189 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1190 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1191 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1192 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1193
1194 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1195
1196 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1197 name of the pattern being matched, for dump purposes. */
1198
1199 static gimple *
1200 vect_recog_widen_op_pattern (vec_info *vinfo,
1201 stmt_vec_info last_stmt_info, tree *type_out,
1202 tree_code orig_code, tree_code wide_code,
1203 bool shift_p, const char *name)
1204 {
1205 gimple *last_stmt = last_stmt_info->stmt;
1206
1207 vect_unpromoted_value unprom[2];
1208 tree half_type;
1209 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1210 shift_p, 2, unprom, &half_type))
1211 return NULL;
1212
1213 /* Pattern detected. */
1214 vect_pattern_detected (name, last_stmt);
1215
1216 tree type = gimple_expr_type (last_stmt);
1217 tree itype = type;
1218 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1219 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1220 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1221 TYPE_UNSIGNED (half_type));
1222
1223 /* Check target support */
1224 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1225 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1226 enum tree_code dummy_code;
1227 int dummy_int;
1228 auto_vec<tree> dummy_vec;
1229 if (!vectype
1230 || !vecitype
1231 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1232 vecitype, vectype,
1233 &dummy_code, &dummy_code,
1234 &dummy_int, &dummy_vec))
1235 return NULL;
1236
1237 *type_out = get_vectype_for_scalar_type (vinfo, type);
1238 if (!*type_out)
1239 return NULL;
1240
1241 tree oprnd[2];
1242 vect_convert_inputs (vinfo, last_stmt_info,
1243 2, oprnd, half_type, unprom, vectype);
1244
1245 tree var = vect_recog_temp_ssa_var (itype, NULL);
1246 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1247 oprnd[0], oprnd[1]);
1248
1249 return vect_convert_output (vinfo, last_stmt_info,
1250 type, pattern_stmt, vecitype);
1251 }
1252
1253 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1254 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1255
1256 static gimple *
1257 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1258 tree *type_out)
1259 {
1260 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1261 MULT_EXPR, WIDEN_MULT_EXPR, false,
1262 "vect_recog_widen_mult_pattern");
1263 }
1264
1265 /* Function vect_recog_pow_pattern
1266
1267 Try to find the following pattern:
1268
1269 x = POW (y, N);
1270
1271 with POW being one of pow, powf, powi, powif and N being
1272 either 2 or 0.5.
1273
1274 Input:
1275
1276 * STMT_VINFO: The stmt from which the pattern search begins.
1277
1278 Output:
1279
1280 * TYPE_OUT: The type of the output of this pattern.
1281
1282 * Return value: A new stmt that will be used to replace the sequence of
1283 stmts that constitute the pattern. In this case it will be:
1284 x = x * x
1285 or
1286 x = sqrt (x)
1287 */
1288
1289 static gimple *
1290 vect_recog_pow_pattern (vec_info *vinfo,
1291 stmt_vec_info stmt_vinfo, tree *type_out)
1292 {
1293 gimple *last_stmt = stmt_vinfo->stmt;
1294 tree base, exp;
1295 gimple *stmt;
1296 tree var;
1297
1298 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1299 return NULL;
1300
1301 switch (gimple_call_combined_fn (last_stmt))
1302 {
1303 CASE_CFN_POW:
1304 CASE_CFN_POWI:
1305 break;
1306
1307 default:
1308 return NULL;
1309 }
1310
1311 base = gimple_call_arg (last_stmt, 0);
1312 exp = gimple_call_arg (last_stmt, 1);
1313 if (TREE_CODE (exp) != REAL_CST
1314 && TREE_CODE (exp) != INTEGER_CST)
1315 {
1316 if (flag_unsafe_math_optimizations
1317 && TREE_CODE (base) == REAL_CST
1318 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1319 {
1320 combined_fn log_cfn;
1321 built_in_function exp_bfn;
1322 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1323 {
1324 case BUILT_IN_POW:
1325 log_cfn = CFN_BUILT_IN_LOG;
1326 exp_bfn = BUILT_IN_EXP;
1327 break;
1328 case BUILT_IN_POWF:
1329 log_cfn = CFN_BUILT_IN_LOGF;
1330 exp_bfn = BUILT_IN_EXPF;
1331 break;
1332 case BUILT_IN_POWL:
1333 log_cfn = CFN_BUILT_IN_LOGL;
1334 exp_bfn = BUILT_IN_EXPL;
1335 break;
1336 default:
1337 return NULL;
1338 }
1339 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1340 tree exp_decl = builtin_decl_implicit (exp_bfn);
1341 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1342 does that, but if C is a power of 2, we want to use
1343 exp2 (log2 (C) * x) in the non-vectorized version, but for
1344 vectorization we don't have vectorized exp2. */
1345 if (logc
1346 && TREE_CODE (logc) == REAL_CST
1347 && exp_decl
1348 && lookup_attribute ("omp declare simd",
1349 DECL_ATTRIBUTES (exp_decl)))
1350 {
1351 cgraph_node *node = cgraph_node::get_create (exp_decl);
1352 if (node->simd_clones == NULL)
1353 {
1354 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1355 || node->definition)
1356 return NULL;
1357 expand_simd_clones (node);
1358 if (node->simd_clones == NULL)
1359 return NULL;
1360 }
1361 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1362 if (!*type_out)
1363 return NULL;
1364 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1365 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1366 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1367 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1368 g = gimple_build_call (exp_decl, 1, def);
1369 gimple_call_set_lhs (g, res);
1370 return g;
1371 }
1372 }
1373
1374 return NULL;
1375 }
1376
1377 /* We now have a pow or powi builtin function call with a constant
1378 exponent. */
1379
1380 /* Catch squaring. */
1381 if ((tree_fits_shwi_p (exp)
1382 && tree_to_shwi (exp) == 2)
1383 || (TREE_CODE (exp) == REAL_CST
1384 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1385 {
1386 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1387 TREE_TYPE (base), type_out))
1388 return NULL;
1389
1390 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1391 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1392 return stmt;
1393 }
1394
1395 /* Catch square root. */
1396 if (TREE_CODE (exp) == REAL_CST
1397 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1398 {
1399 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1400 if (*type_out
1401 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1402 OPTIMIZE_FOR_SPEED))
1403 {
1404 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1405 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1406 gimple_call_set_lhs (stmt, var);
1407 gimple_call_set_nothrow (stmt, true);
1408 return stmt;
1409 }
1410 }
1411
1412 return NULL;
1413 }
1414
1415
1416 /* Function vect_recog_widen_sum_pattern
1417
1418 Try to find the following pattern:
1419
1420 type x_t;
1421 TYPE x_T, sum = init;
1422 loop:
1423 sum_0 = phi <init, sum_1>
1424 S1 x_t = *p;
1425 S2 x_T = (TYPE) x_t;
1426 S3 sum_1 = x_T + sum_0;
1427
1428 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1429 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1430 a special case of a reduction computation.
1431
1432 Input:
1433
1434 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1435 when this function is called with S3, the pattern {S2,S3} will be detected.
1436
1437 Output:
1438
1439 * TYPE_OUT: The type of the output of this pattern.
1440
1441 * Return value: A new stmt that will be used to replace the sequence of
1442 stmts that constitute the pattern. In this case it will be:
1443 WIDEN_SUM <x_t, sum_0>
1444
1445 Note: The widening-sum idiom is a widening reduction pattern that is
1446 vectorized without preserving all the intermediate results. It
1447 produces only N/2 (widened) results (by summing up pairs of
1448 intermediate results) rather than all N results. Therefore, we
1449 cannot allow this pattern when we want to get all the results and in
1450 the correct order (as is the case when this computation is in an
1451 inner-loop nested in an outer-loop that us being vectorized). */
1452
1453 static gimple *
1454 vect_recog_widen_sum_pattern (vec_info *vinfo,
1455 stmt_vec_info stmt_vinfo, tree *type_out)
1456 {
1457 gimple *last_stmt = stmt_vinfo->stmt;
1458 tree oprnd0, oprnd1;
1459 tree type;
1460 gimple *pattern_stmt;
1461 tree var;
1462
1463 /* Look for the following pattern
1464 DX = (TYPE) X;
1465 sum_1 = DX + sum_0;
1466 In which DX is at least double the size of X, and sum_1 has been
1467 recognized as a reduction variable.
1468 */
1469
1470 /* Starting from LAST_STMT, follow the defs of its uses in search
1471 of the above pattern. */
1472
1473 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1474 &oprnd0, &oprnd1))
1475 return NULL;
1476
1477 type = gimple_expr_type (last_stmt);
1478
1479 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1480 we know that oprnd1 is the reduction variable (defined by a loop-header
1481 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1482 Left to check that oprnd0 is defined by a cast from type 'type' to type
1483 'TYPE'. */
1484
1485 vect_unpromoted_value unprom0;
1486 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1487 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1488 return NULL;
1489
1490 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1491
1492 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1493 unprom0.type, type_out))
1494 return NULL;
1495
1496 var = vect_recog_temp_ssa_var (type, NULL);
1497 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1498
1499 return pattern_stmt;
1500 }
1501
1502 /* Recognize cases in which an operation is performed in one type WTYPE
1503 but could be done more efficiently in a narrower type NTYPE. For example,
1504 if we have:
1505
1506 ATYPE a; // narrower than NTYPE
1507 BTYPE b; // narrower than NTYPE
1508 WTYPE aw = (WTYPE) a;
1509 WTYPE bw = (WTYPE) b;
1510 WTYPE res = aw + bw; // only uses of aw and bw
1511
1512 then it would be more efficient to do:
1513
1514 NTYPE an = (NTYPE) a;
1515 NTYPE bn = (NTYPE) b;
1516 NTYPE resn = an + bn;
1517 WTYPE res = (WTYPE) resn;
1518
1519 Other situations include things like:
1520
1521 ATYPE a; // NTYPE or narrower
1522 WTYPE aw = (WTYPE) a;
1523 WTYPE res = aw + b;
1524
1525 when only "(NTYPE) res" is significant. In that case it's more efficient
1526 to truncate "b" and do the operation on NTYPE instead:
1527
1528 NTYPE an = (NTYPE) a;
1529 NTYPE bn = (NTYPE) b; // truncation
1530 NTYPE resn = an + bn;
1531 WTYPE res = (WTYPE) resn;
1532
1533 All users of "res" should then use "resn" instead, making the final
1534 statement dead (not marked as relevant). The final statement is still
1535 needed to maintain the type correctness of the IR.
1536
1537 vect_determine_precisions has already determined the minimum
1538 precison of the operation and the minimum precision required
1539 by users of the result. */
1540
1541 static gimple *
1542 vect_recog_over_widening_pattern (vec_info *vinfo,
1543 stmt_vec_info last_stmt_info, tree *type_out)
1544 {
1545 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1546 if (!last_stmt)
1547 return NULL;
1548
1549 /* See whether we have found that this operation can be done on a
1550 narrower type without changing its semantics. */
1551 unsigned int new_precision = last_stmt_info->operation_precision;
1552 if (!new_precision)
1553 return NULL;
1554
1555 tree lhs = gimple_assign_lhs (last_stmt);
1556 tree type = TREE_TYPE (lhs);
1557 tree_code code = gimple_assign_rhs_code (last_stmt);
1558
1559 /* Keep the first operand of a COND_EXPR as-is: only the other two
1560 operands are interesting. */
1561 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1562
1563 /* Check the operands. */
1564 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1565 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1566 unprom.quick_grow (nops);
1567 unsigned int min_precision = 0;
1568 bool single_use_p = false;
1569 for (unsigned int i = 0; i < nops; ++i)
1570 {
1571 tree op = gimple_op (last_stmt, first_op + i);
1572 if (TREE_CODE (op) == INTEGER_CST)
1573 unprom[i].set_op (op, vect_constant_def);
1574 else if (TREE_CODE (op) == SSA_NAME)
1575 {
1576 bool op_single_use_p = true;
1577 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1578 &op_single_use_p))
1579 return NULL;
1580 /* If:
1581
1582 (1) N bits of the result are needed;
1583 (2) all inputs are widened from M<N bits; and
1584 (3) one operand OP is a single-use SSA name
1585
1586 we can shift the M->N widening from OP to the output
1587 without changing the number or type of extensions involved.
1588 This then reduces the number of copies of STMT_INFO.
1589
1590 If instead of (3) more than one operand is a single-use SSA name,
1591 shifting the extension to the output is even more of a win.
1592
1593 If instead:
1594
1595 (1) N bits of the result are needed;
1596 (2) one operand OP2 is widened from M2<N bits;
1597 (3) another operand OP1 is widened from M1<M2 bits; and
1598 (4) both OP1 and OP2 are single-use
1599
1600 the choice is between:
1601
1602 (a) truncating OP2 to M1, doing the operation on M1,
1603 and then widening the result to N
1604
1605 (b) widening OP1 to M2, doing the operation on M2, and then
1606 widening the result to N
1607
1608 Both shift the M2->N widening of the inputs to the output.
1609 (a) additionally shifts the M1->M2 widening to the output;
1610 it requires fewer copies of STMT_INFO but requires an extra
1611 M2->M1 truncation.
1612
1613 Which is better will depend on the complexity and cost of
1614 STMT_INFO, which is hard to predict at this stage. However,
1615 a clear tie-breaker in favor of (b) is the fact that the
1616 truncation in (a) increases the length of the operation chain.
1617
1618 If instead of (4) only one of OP1 or OP2 is single-use,
1619 (b) is still a win over doing the operation in N bits:
1620 it still shifts the M2->N widening on the single-use operand
1621 to the output and reduces the number of STMT_INFO copies.
1622
1623 If neither operand is single-use then operating on fewer than
1624 N bits might lead to more extensions overall. Whether it does
1625 or not depends on global information about the vectorization
1626 region, and whether that's a good trade-off would again
1627 depend on the complexity and cost of the statements involved,
1628 as well as things like register pressure that are not normally
1629 modelled at this stage. We therefore ignore these cases
1630 and just optimize the clear single-use wins above.
1631
1632 Thus we take the maximum precision of the unpromoted operands
1633 and record whether any operand is single-use. */
1634 if (unprom[i].dt == vect_internal_def)
1635 {
1636 min_precision = MAX (min_precision,
1637 TYPE_PRECISION (unprom[i].type));
1638 single_use_p |= op_single_use_p;
1639 }
1640 }
1641 }
1642
1643 /* Although the operation could be done in operation_precision, we have
1644 to balance that against introducing extra truncations or extensions.
1645 Calculate the minimum precision that can be handled efficiently.
1646
1647 The loop above determined that the operation could be handled
1648 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1649 extension from the inputs to the output without introducing more
1650 instructions, and would reduce the number of instructions required
1651 for STMT_INFO itself.
1652
1653 vect_determine_precisions has also determined that the result only
1654 needs min_output_precision bits. Truncating by a factor of N times
1655 requires a tree of N - 1 instructions, so if TYPE is N times wider
1656 than min_output_precision, doing the operation in TYPE and truncating
1657 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1658 In contrast:
1659
1660 - truncating the input to a unary operation and doing the operation
1661 in the new type requires at most N - 1 + 1 = N instructions per
1662 output vector
1663
1664 - doing the same for a binary operation requires at most
1665 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1666
1667 Both unary and binary operations require fewer instructions than
1668 this if the operands were extended from a suitable truncated form.
1669 Thus there is usually nothing to lose by doing operations in
1670 min_output_precision bits, but there can be something to gain. */
1671 if (!single_use_p)
1672 min_precision = last_stmt_info->min_output_precision;
1673 else
1674 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1675
1676 /* Apply the minimum efficient precision we just calculated. */
1677 if (new_precision < min_precision)
1678 new_precision = min_precision;
1679 if (new_precision >= TYPE_PRECISION (type))
1680 return NULL;
1681
1682 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1683
1684 *type_out = get_vectype_for_scalar_type (vinfo, type);
1685 if (!*type_out)
1686 return NULL;
1687
1688 /* We've found a viable pattern. Get the new type of the operation. */
1689 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1690 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1691
1692 /* If we're truncating an operation, we need to make sure that we
1693 don't introduce new undefined overflow. The codes tested here are
1694 a subset of those accepted by vect_truncatable_operation_p. */
1695 tree op_type = new_type;
1696 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1697 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1698 op_type = build_nonstandard_integer_type (new_precision, true);
1699
1700 /* We specifically don't check here whether the target supports the
1701 new operation, since it might be something that a later pattern
1702 wants to rewrite anyway. If targets have a minimum element size
1703 for some optabs, we should pattern-match smaller ops to larger ops
1704 where beneficial. */
1705 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1706 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1707 if (!new_vectype || !op_vectype)
1708 return NULL;
1709
1710 if (dump_enabled_p ())
1711 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1712 type, new_type);
1713
1714 /* Calculate the rhs operands for an operation on OP_TYPE. */
1715 tree ops[3] = {};
1716 for (unsigned int i = 1; i < first_op; ++i)
1717 ops[i - 1] = gimple_op (last_stmt, i);
1718 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
1719 op_type, &unprom[0], op_vectype);
1720
1721 /* Use the operation to produce a result of type OP_TYPE. */
1722 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1723 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1724 ops[0], ops[1], ops[2]);
1725 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1726
1727 if (dump_enabled_p ())
1728 dump_printf_loc (MSG_NOTE, vect_location,
1729 "created pattern stmt: %G", pattern_stmt);
1730
1731 /* Convert back to the original signedness, if OP_TYPE is different
1732 from NEW_TYPE. */
1733 if (op_type != new_type)
1734 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
1735 pattern_stmt, op_vectype);
1736
1737 /* Promote the result to the original type. */
1738 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
1739 pattern_stmt, new_vectype);
1740
1741 return pattern_stmt;
1742 }
1743
1744 /* Recognize the following patterns:
1745
1746 ATYPE a; // narrower than TYPE
1747 BTYPE b; // narrower than TYPE
1748
1749 1) Multiply high with scaling
1750 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1751 2) ... or also with rounding
1752 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1753
1754 where only the bottom half of res is used. */
1755
1756 static gimple *
1757 vect_recog_mulhs_pattern (vec_info *vinfo,
1758 stmt_vec_info last_stmt_info, tree *type_out)
1759 {
1760 /* Check for a right shift. */
1761 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1762 if (!last_stmt
1763 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1764 return NULL;
1765
1766 /* Check that the shift result is wider than the users of the
1767 result need (i.e. that narrowing would be a natural choice). */
1768 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1769 unsigned int target_precision
1770 = vect_element_precision (last_stmt_info->min_output_precision);
1771 if (!INTEGRAL_TYPE_P (lhs_type)
1772 || target_precision >= TYPE_PRECISION (lhs_type))
1773 return NULL;
1774
1775 /* Look through any change in sign on the outer shift input. */
1776 vect_unpromoted_value unprom_rshift_input;
1777 tree rshift_input = vect_look_through_possible_promotion
1778 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1779 if (!rshift_input
1780 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1781 != TYPE_PRECISION (lhs_type))
1782 return NULL;
1783
1784 /* Get the definition of the shift input. */
1785 stmt_vec_info rshift_input_stmt_info
1786 = vect_get_internal_def (vinfo, rshift_input);
1787 if (!rshift_input_stmt_info)
1788 return NULL;
1789 gassign *rshift_input_stmt
1790 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1791 if (!rshift_input_stmt)
1792 return NULL;
1793
1794 stmt_vec_info mulh_stmt_info;
1795 tree scale_term;
1796 internal_fn ifn;
1797 unsigned int expect_offset;
1798
1799 /* Check for the presence of the rounding term. */
1800 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1801 {
1802 /* Check that the outer shift was by 1. */
1803 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1804 return NULL;
1805
1806 /* Check that the second operand of the PLUS_EXPR is 1. */
1807 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1808 return NULL;
1809
1810 /* Look through any change in sign on the addition input. */
1811 vect_unpromoted_value unprom_plus_input;
1812 tree plus_input = vect_look_through_possible_promotion
1813 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1814 if (!plus_input
1815 || TYPE_PRECISION (TREE_TYPE (plus_input))
1816 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
1817 return NULL;
1818
1819 /* Get the definition of the multiply-high-scale part. */
1820 stmt_vec_info plus_input_stmt_info
1821 = vect_get_internal_def (vinfo, plus_input);
1822 if (!plus_input_stmt_info)
1823 return NULL;
1824 gassign *plus_input_stmt
1825 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1826 if (!plus_input_stmt
1827 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1828 return NULL;
1829
1830 /* Look through any change in sign on the scaling input. */
1831 vect_unpromoted_value unprom_scale_input;
1832 tree scale_input = vect_look_through_possible_promotion
1833 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1834 if (!scale_input
1835 || TYPE_PRECISION (TREE_TYPE (scale_input))
1836 != TYPE_PRECISION (TREE_TYPE (plus_input)))
1837 return NULL;
1838
1839 /* Get the definition of the multiply-high part. */
1840 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1841 if (!mulh_stmt_info)
1842 return NULL;
1843
1844 /* Get the scaling term. */
1845 scale_term = gimple_assign_rhs2 (plus_input_stmt);
1846
1847 expect_offset = target_precision + 2;
1848 ifn = IFN_MULHRS;
1849 }
1850 else
1851 {
1852 mulh_stmt_info = rshift_input_stmt_info;
1853 scale_term = gimple_assign_rhs2 (last_stmt);
1854
1855 expect_offset = target_precision + 1;
1856 ifn = IFN_MULHS;
1857 }
1858
1859 /* Check that the scaling factor is correct. */
1860 if (TREE_CODE (scale_term) != INTEGER_CST
1861 || wi::to_widest (scale_term) + expect_offset
1862 != TYPE_PRECISION (lhs_type))
1863 return NULL;
1864
1865 /* Check whether the scaling input term can be seen as two widened
1866 inputs multiplied together. */
1867 vect_unpromoted_value unprom_mult[2];
1868 tree new_type;
1869 unsigned int nops
1870 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1871 false, 2, unprom_mult, &new_type);
1872 if (nops != 2)
1873 return NULL;
1874
1875 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1876
1877 /* Adjust output precision. */
1878 if (TYPE_PRECISION (new_type) < target_precision)
1879 new_type = build_nonstandard_integer_type
1880 (target_precision, TYPE_UNSIGNED (new_type));
1881
1882 /* Check for target support. */
1883 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1884 if (!new_vectype
1885 || !direct_internal_fn_supported_p
1886 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1887 return NULL;
1888
1889 /* The IR requires a valid vector type for the cast result, even though
1890 it's likely to be discarded. */
1891 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
1892 if (!*type_out)
1893 return NULL;
1894
1895 /* Generate the IFN_MULHRS call. */
1896 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1897 tree new_ops[2];
1898 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
1899 unprom_mult, new_vectype);
1900 gcall *mulhrs_stmt
1901 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1902 gimple_call_set_lhs (mulhrs_stmt, new_var);
1903 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1904
1905 if (dump_enabled_p ())
1906 dump_printf_loc (MSG_NOTE, vect_location,
1907 "created pattern stmt: %G", mulhrs_stmt);
1908
1909 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
1910 mulhrs_stmt, new_vectype);
1911 }
1912
1913 /* Recognize the patterns:
1914
1915 ATYPE a; // narrower than TYPE
1916 BTYPE b; // narrower than TYPE
1917 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1918 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1919
1920 where only the bottom half of avg is used. Try to transform them into:
1921
1922 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1923 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1924
1925 followed by:
1926
1927 TYPE avg = (TYPE) avg';
1928
1929 where NTYPE is no wider than half of TYPE. Since only the bottom half
1930 of avg is used, all or part of the cast of avg' should become redundant.
1931
1932 If there is no target support available, generate code to distribute rshift
1933 over plus and add a carry. */
1934
1935 static gimple *
1936 vect_recog_average_pattern (vec_info *vinfo,
1937 stmt_vec_info last_stmt_info, tree *type_out)
1938 {
1939 /* Check for a shift right by one bit. */
1940 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1941 if (!last_stmt
1942 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1943 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1944 return NULL;
1945
1946 /* Check that the shift result is wider than the users of the
1947 result need (i.e. that narrowing would be a natural choice). */
1948 tree lhs = gimple_assign_lhs (last_stmt);
1949 tree type = TREE_TYPE (lhs);
1950 unsigned int target_precision
1951 = vect_element_precision (last_stmt_info->min_output_precision);
1952 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1953 return NULL;
1954
1955 /* Look through any change in sign on the shift input. */
1956 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1957 vect_unpromoted_value unprom_plus;
1958 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1959 &unprom_plus);
1960 if (!rshift_rhs
1961 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1962 return NULL;
1963
1964 /* Get the definition of the shift input. */
1965 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1966 if (!plus_stmt_info)
1967 return NULL;
1968
1969 /* Check whether the shift input can be seen as a tree of additions on
1970 2 or 3 widened inputs.
1971
1972 Note that the pattern should be a win even if the result of one or
1973 more additions is reused elsewhere: if the pattern matches, we'd be
1974 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1975 internal_fn ifn = IFN_AVG_FLOOR;
1976 vect_unpromoted_value unprom[3];
1977 tree new_type;
1978 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
1979 PLUS_EXPR, false, 3,
1980 unprom, &new_type);
1981 if (nops == 0)
1982 return NULL;
1983 if (nops == 3)
1984 {
1985 /* Check that one operand is 1. */
1986 unsigned int i;
1987 for (i = 0; i < 3; ++i)
1988 if (integer_onep (unprom[i].op))
1989 break;
1990 if (i == 3)
1991 return NULL;
1992 /* Throw away the 1 operand and keep the other two. */
1993 if (i < 2)
1994 unprom[i] = unprom[2];
1995 ifn = IFN_AVG_CEIL;
1996 }
1997
1998 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1999
2000 /* We know that:
2001
2002 (a) the operation can be viewed as:
2003
2004 TYPE widened0 = (TYPE) UNPROM[0];
2005 TYPE widened1 = (TYPE) UNPROM[1];
2006 TYPE tmp1 = widened0 + widened1 {+ 1};
2007 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2008
2009 (b) the first two statements are equivalent to:
2010
2011 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2012 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2013
2014 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2015 where sensible;
2016
2017 (d) all the operations can be performed correctly at twice the width of
2018 NEW_TYPE, due to the nature of the average operation; and
2019
2020 (e) users of the result of the right shift need only TARGET_PRECISION
2021 bits, where TARGET_PRECISION is no more than half of TYPE's
2022 precision.
2023
2024 Under these circumstances, the only situation in which NEW_TYPE
2025 could be narrower than TARGET_PRECISION is if widened0, widened1
2026 and an addition result are all used more than once. Thus we can
2027 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2028 as "free", whereas widening the result of the average instruction
2029 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2030 therefore better not to go narrower than TARGET_PRECISION. */
2031 if (TYPE_PRECISION (new_type) < target_precision)
2032 new_type = build_nonstandard_integer_type (target_precision,
2033 TYPE_UNSIGNED (new_type));
2034
2035 /* Check for target support. */
2036 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2037 if (!new_vectype)
2038 return NULL;
2039
2040 bool fallback_p = false;
2041
2042 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2043 ;
2044 else if (TYPE_UNSIGNED (new_type)
2045 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2046 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2047 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2048 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2049 fallback_p = true;
2050 else
2051 return NULL;
2052
2053 /* The IR requires a valid vector type for the cast result, even though
2054 it's likely to be discarded. */
2055 *type_out = get_vectype_for_scalar_type (vinfo, type);
2056 if (!*type_out)
2057 return NULL;
2058
2059 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2060 tree new_ops[2];
2061 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2062 unprom, new_vectype);
2063
2064 if (fallback_p)
2065 {
2066 /* As a fallback, generate code for following sequence:
2067
2068 shifted_op0 = new_ops[0] >> 1;
2069 shifted_op1 = new_ops[1] >> 1;
2070 sum_of_shifted = shifted_op0 + shifted_op1;
2071 unmasked_carry = new_ops[0] and/or new_ops[1];
2072 carry = unmasked_carry & 1;
2073 new_var = sum_of_shifted + carry;
2074 */
2075
2076 tree one_cst = build_one_cst (new_type);
2077 gassign *g;
2078
2079 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2080 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2081 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2082
2083 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2084 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2085 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2086
2087 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2088 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2089 shifted_op0, shifted_op1);
2090 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2091
2092 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2093 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2094 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2095 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2096
2097 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2098 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2099 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2100
2101 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2102 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2103 }
2104
2105 /* Generate the IFN_AVG* call. */
2106 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2107 new_ops[1]);
2108 gimple_call_set_lhs (average_stmt, new_var);
2109 gimple_set_location (average_stmt, gimple_location (last_stmt));
2110
2111 if (dump_enabled_p ())
2112 dump_printf_loc (MSG_NOTE, vect_location,
2113 "created pattern stmt: %G", average_stmt);
2114
2115 return vect_convert_output (vinfo, last_stmt_info,
2116 type, average_stmt, new_vectype);
2117 }
2118
2119 /* Recognize cases in which the input to a cast is wider than its
2120 output, and the input is fed by a widening operation. Fold this
2121 by removing the unnecessary intermediate widening. E.g.:
2122
2123 unsigned char a;
2124 unsigned int b = (unsigned int) a;
2125 unsigned short c = (unsigned short) b;
2126
2127 -->
2128
2129 unsigned short c = (unsigned short) a;
2130
2131 Although this is rare in input IR, it is an expected side-effect
2132 of the over-widening pattern above.
2133
2134 This is beneficial also for integer-to-float conversions, if the
2135 widened integer has more bits than the float, and if the unwidened
2136 input doesn't. */
2137
2138 static gimple *
2139 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2140 stmt_vec_info last_stmt_info, tree *type_out)
2141 {
2142 /* Check for a cast, including an integer-to-float conversion. */
2143 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2144 if (!last_stmt)
2145 return NULL;
2146 tree_code code = gimple_assign_rhs_code (last_stmt);
2147 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2148 return NULL;
2149
2150 /* Make sure that the rhs is a scalar with a natural bitsize. */
2151 tree lhs = gimple_assign_lhs (last_stmt);
2152 if (!lhs)
2153 return NULL;
2154 tree lhs_type = TREE_TYPE (lhs);
2155 scalar_mode lhs_mode;
2156 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2157 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2158 return NULL;
2159
2160 /* Check for a narrowing operation (from a vector point of view). */
2161 tree rhs = gimple_assign_rhs1 (last_stmt);
2162 tree rhs_type = TREE_TYPE (rhs);
2163 if (!INTEGRAL_TYPE_P (rhs_type)
2164 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2165 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2166 return NULL;
2167
2168 /* Try to find an unpromoted input. */
2169 vect_unpromoted_value unprom;
2170 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2171 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2172 return NULL;
2173
2174 /* If the bits above RHS_TYPE matter, make sure that they're the
2175 same when extending from UNPROM as they are when extending from RHS. */
2176 if (!INTEGRAL_TYPE_P (lhs_type)
2177 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2178 return NULL;
2179
2180 /* We can get the same result by casting UNPROM directly, to avoid
2181 the unnecessary widening and narrowing. */
2182 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2183
2184 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2185 if (!*type_out)
2186 return NULL;
2187
2188 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2189 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2190 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2191
2192 return pattern_stmt;
2193 }
2194
2195 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2196 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2197
2198 static gimple *
2199 vect_recog_widen_shift_pattern (vec_info *vinfo,
2200 stmt_vec_info last_stmt_info, tree *type_out)
2201 {
2202 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2203 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2204 "vect_recog_widen_shift_pattern");
2205 }
2206
2207 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2208
2209 type a_t, b_t, c_t;
2210
2211 S0 a_t = b_t r<< c_t;
2212
2213 Input/Output:
2214
2215 * STMT_VINFO: The stmt from which the pattern search begins,
2216 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2217 with a sequence:
2218
2219 S1 d_t = -c_t;
2220 S2 e_t = d_t & (B - 1);
2221 S3 f_t = b_t << c_t;
2222 S4 g_t = b_t >> e_t;
2223 S0 a_t = f_t | g_t;
2224
2225 where B is element bitsize of type.
2226
2227 Output:
2228
2229 * TYPE_OUT: The type of the output of this pattern.
2230
2231 * Return value: A new stmt that will be used to replace the rotate
2232 S0 stmt. */
2233
2234 static gimple *
2235 vect_recog_rotate_pattern (vec_info *vinfo,
2236 stmt_vec_info stmt_vinfo, tree *type_out)
2237 {
2238 gimple *last_stmt = stmt_vinfo->stmt;
2239 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2240 gimple *pattern_stmt, *def_stmt;
2241 enum tree_code rhs_code;
2242 enum vect_def_type dt;
2243 optab optab1, optab2;
2244 edge ext_def = NULL;
2245 bool bswap16_p = false;
2246
2247 if (is_gimple_assign (last_stmt))
2248 {
2249 rhs_code = gimple_assign_rhs_code (last_stmt);
2250 switch (rhs_code)
2251 {
2252 case LROTATE_EXPR:
2253 case RROTATE_EXPR:
2254 break;
2255 default:
2256 return NULL;
2257 }
2258
2259 lhs = gimple_assign_lhs (last_stmt);
2260 oprnd0 = gimple_assign_rhs1 (last_stmt);
2261 type = TREE_TYPE (oprnd0);
2262 oprnd1 = gimple_assign_rhs2 (last_stmt);
2263 }
2264 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2265 {
2266 /* __builtin_bswap16 (x) is another form of x r>> 8.
2267 The vectorizer has bswap support, but only if the argument isn't
2268 promoted. */
2269 lhs = gimple_call_lhs (last_stmt);
2270 oprnd0 = gimple_call_arg (last_stmt, 0);
2271 type = TREE_TYPE (oprnd0);
2272 if (!lhs
2273 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2274 || TYPE_PRECISION (type) <= 16
2275 || TREE_CODE (oprnd0) != SSA_NAME
2276 || BITS_PER_UNIT != 8
2277 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2278 return NULL;
2279
2280 stmt_vec_info def_stmt_info;
2281 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2282 return NULL;
2283
2284 if (dt != vect_internal_def)
2285 return NULL;
2286
2287 if (gimple_assign_cast_p (def_stmt))
2288 {
2289 def = gimple_assign_rhs1 (def_stmt);
2290 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2291 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2292 oprnd0 = def;
2293 }
2294
2295 type = TREE_TYPE (lhs);
2296 vectype = get_vectype_for_scalar_type (vinfo, type);
2297 if (vectype == NULL_TREE)
2298 return NULL;
2299
2300 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2301 {
2302 /* The encoding uses one stepped pattern for each byte in the
2303 16-bit word. */
2304 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2305 for (unsigned i = 0; i < 3; ++i)
2306 for (unsigned j = 0; j < 2; ++j)
2307 elts.quick_push ((i + 1) * 2 - j - 1);
2308
2309 vec_perm_indices indices (elts, 1,
2310 TYPE_VECTOR_SUBPARTS (char_vectype));
2311 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2312 {
2313 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2314 undo the argument promotion. */
2315 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2316 {
2317 def = vect_recog_temp_ssa_var (type, NULL);
2318 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2319 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2320 oprnd0 = def;
2321 }
2322
2323 /* Pattern detected. */
2324 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2325
2326 *type_out = vectype;
2327
2328 /* Pattern supported. Create a stmt to be used to replace the
2329 pattern, with the unpromoted argument. */
2330 var = vect_recog_temp_ssa_var (type, NULL);
2331 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2332 1, oprnd0);
2333 gimple_call_set_lhs (pattern_stmt, var);
2334 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2335 gimple_call_fntype (last_stmt));
2336 return pattern_stmt;
2337 }
2338 }
2339
2340 oprnd1 = build_int_cst (integer_type_node, 8);
2341 rhs_code = LROTATE_EXPR;
2342 bswap16_p = true;
2343 }
2344 else
2345 return NULL;
2346
2347 if (TREE_CODE (oprnd0) != SSA_NAME
2348 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2349 || !INTEGRAL_TYPE_P (type)
2350 || !TYPE_UNSIGNED (type))
2351 return NULL;
2352
2353 stmt_vec_info def_stmt_info;
2354 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2355 return NULL;
2356
2357 if (dt != vect_internal_def
2358 && dt != vect_constant_def
2359 && dt != vect_external_def)
2360 return NULL;
2361
2362 vectype = get_vectype_for_scalar_type (vinfo, type);
2363 if (vectype == NULL_TREE)
2364 return NULL;
2365
2366 /* If vector/vector or vector/scalar rotate is supported by the target,
2367 don't do anything here. */
2368 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2369 if (optab1
2370 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2371 {
2372 use_rotate:
2373 if (bswap16_p)
2374 {
2375 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2376 {
2377 def = vect_recog_temp_ssa_var (type, NULL);
2378 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2379 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2380 oprnd0 = def;
2381 }
2382
2383 /* Pattern detected. */
2384 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2385
2386 *type_out = vectype;
2387
2388 /* Pattern supported. Create a stmt to be used to replace the
2389 pattern. */
2390 var = vect_recog_temp_ssa_var (type, NULL);
2391 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2392 oprnd1);
2393 return pattern_stmt;
2394 }
2395 return NULL;
2396 }
2397
2398 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2399 {
2400 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2401 if (optab2
2402 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2403 goto use_rotate;
2404 }
2405
2406 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2407 don't do anything here either. */
2408 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2409 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2410 if (!optab1
2411 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2412 || !optab2
2413 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2414 {
2415 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2416 return NULL;
2417 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2418 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2419 if (!optab1
2420 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2421 || !optab2
2422 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2423 return NULL;
2424 }
2425
2426 *type_out = vectype;
2427
2428 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2429 {
2430 def = vect_recog_temp_ssa_var (type, NULL);
2431 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2432 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2433 oprnd0 = def;
2434 }
2435
2436 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2437 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2438
2439 def = NULL_TREE;
2440 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2441 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2442 def = oprnd1;
2443 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2444 {
2445 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2446 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2447 && TYPE_PRECISION (TREE_TYPE (rhs1))
2448 == TYPE_PRECISION (type))
2449 def = rhs1;
2450 }
2451
2452 if (def == NULL_TREE)
2453 {
2454 def = vect_recog_temp_ssa_var (type, NULL);
2455 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2456 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2457 }
2458 stype = TREE_TYPE (def);
2459 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2460
2461 if (TREE_CODE (def) == INTEGER_CST)
2462 {
2463 if (!tree_fits_uhwi_p (def)
2464 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2465 || integer_zerop (def))
2466 return NULL;
2467 def2 = build_int_cst (stype,
2468 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2469 }
2470 else
2471 {
2472 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2473
2474 if (vecstype == NULL_TREE)
2475 return NULL;
2476 def2 = vect_recog_temp_ssa_var (stype, NULL);
2477 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2478 if (ext_def)
2479 {
2480 basic_block new_bb
2481 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2482 gcc_assert (!new_bb);
2483 }
2484 else
2485 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2486
2487 def2 = vect_recog_temp_ssa_var (stype, NULL);
2488 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2489 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2490 gimple_assign_lhs (def_stmt), mask);
2491 if (ext_def)
2492 {
2493 basic_block new_bb
2494 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2495 gcc_assert (!new_bb);
2496 }
2497 else
2498 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2499 }
2500
2501 var1 = vect_recog_temp_ssa_var (type, NULL);
2502 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2503 ? LSHIFT_EXPR : RSHIFT_EXPR,
2504 oprnd0, def);
2505 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2506
2507 var2 = vect_recog_temp_ssa_var (type, NULL);
2508 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2509 ? RSHIFT_EXPR : LSHIFT_EXPR,
2510 oprnd0, def2);
2511 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2512
2513 /* Pattern detected. */
2514 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2515
2516 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2517 var = vect_recog_temp_ssa_var (type, NULL);
2518 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2519
2520 return pattern_stmt;
2521 }
2522
2523 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2524 vectorized:
2525
2526 type a_t;
2527 TYPE b_T, res_T;
2528
2529 S1 a_t = ;
2530 S2 b_T = ;
2531 S3 res_T = b_T op a_t;
2532
2533 where type 'TYPE' is a type with different size than 'type',
2534 and op is <<, >> or rotate.
2535
2536 Also detect cases:
2537
2538 type a_t;
2539 TYPE b_T, c_T, res_T;
2540
2541 S0 c_T = ;
2542 S1 a_t = (type) c_T;
2543 S2 b_T = ;
2544 S3 res_T = b_T op a_t;
2545
2546 Input/Output:
2547
2548 * STMT_VINFO: The stmt from which the pattern search begins,
2549 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2550 with a shift/rotate which has same type on both operands, in the
2551 second case just b_T op c_T, in the first case with added cast
2552 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2553
2554 Output:
2555
2556 * TYPE_OUT: The type of the output of this pattern.
2557
2558 * Return value: A new stmt that will be used to replace the shift/rotate
2559 S3 stmt. */
2560
2561 static gimple *
2562 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2563 stmt_vec_info stmt_vinfo,
2564 tree *type_out)
2565 {
2566 gimple *last_stmt = stmt_vinfo->stmt;
2567 tree oprnd0, oprnd1, lhs, var;
2568 gimple *pattern_stmt;
2569 enum tree_code rhs_code;
2570
2571 if (!is_gimple_assign (last_stmt))
2572 return NULL;
2573
2574 rhs_code = gimple_assign_rhs_code (last_stmt);
2575 switch (rhs_code)
2576 {
2577 case LSHIFT_EXPR:
2578 case RSHIFT_EXPR:
2579 case LROTATE_EXPR:
2580 case RROTATE_EXPR:
2581 break;
2582 default:
2583 return NULL;
2584 }
2585
2586 lhs = gimple_assign_lhs (last_stmt);
2587 oprnd0 = gimple_assign_rhs1 (last_stmt);
2588 oprnd1 = gimple_assign_rhs2 (last_stmt);
2589 if (TREE_CODE (oprnd0) != SSA_NAME
2590 || TREE_CODE (oprnd1) != SSA_NAME
2591 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2592 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2593 || TYPE_PRECISION (TREE_TYPE (lhs))
2594 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2595 return NULL;
2596
2597 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2598 if (!def_vinfo)
2599 return NULL;
2600
2601 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2602 if (*type_out == NULL_TREE)
2603 return NULL;
2604
2605 tree def = NULL_TREE;
2606 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2607 if (def_stmt && gimple_assign_cast_p (def_stmt))
2608 {
2609 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2610 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2611 && TYPE_PRECISION (TREE_TYPE (rhs1))
2612 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2613 {
2614 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2615 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2616 def = rhs1;
2617 else
2618 {
2619 tree mask
2620 = build_low_bits_mask (TREE_TYPE (rhs1),
2621 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2622 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2623 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2624 tree vecstype = get_vectype_for_scalar_type (vinfo,
2625 TREE_TYPE (rhs1));
2626 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2627 }
2628 }
2629 }
2630
2631 if (def == NULL_TREE)
2632 {
2633 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2634 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2635 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2636 }
2637
2638 /* Pattern detected. */
2639 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2640
2641 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2642 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2643 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2644
2645 return pattern_stmt;
2646 }
2647
2648 /* Return true iff the target has a vector optab implementing the operation
2649 CODE on type VECTYPE. */
2650
2651 static bool
2652 target_has_vecop_for_code (tree_code code, tree vectype)
2653 {
2654 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2655 return voptab
2656 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2657 }
2658
2659 /* Verify that the target has optabs of VECTYPE to perform all the steps
2660 needed by the multiplication-by-immediate synthesis algorithm described by
2661 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2662 present. Return true iff the target supports all the steps. */
2663
2664 static bool
2665 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2666 tree vectype, bool synth_shift_p)
2667 {
2668 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2669 return false;
2670
2671 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2672 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2673
2674 if (var == negate_variant
2675 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2676 return false;
2677
2678 /* If we must synthesize shifts with additions make sure that vector
2679 addition is available. */
2680 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2681 return false;
2682
2683 for (int i = 1; i < alg->ops; i++)
2684 {
2685 switch (alg->op[i])
2686 {
2687 case alg_shift:
2688 break;
2689 case alg_add_t_m2:
2690 case alg_add_t2_m:
2691 case alg_add_factor:
2692 if (!supports_vplus)
2693 return false;
2694 break;
2695 case alg_sub_t_m2:
2696 case alg_sub_t2_m:
2697 case alg_sub_factor:
2698 if (!supports_vminus)
2699 return false;
2700 break;
2701 case alg_unknown:
2702 case alg_m:
2703 case alg_zero:
2704 case alg_impossible:
2705 return false;
2706 default:
2707 gcc_unreachable ();
2708 }
2709 }
2710
2711 return true;
2712 }
2713
2714 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2715 putting the final result in DEST. Append all statements but the last into
2716 VINFO. Return the last statement. */
2717
2718 static gimple *
2719 synth_lshift_by_additions (vec_info *vinfo,
2720 tree dest, tree op, HOST_WIDE_INT amnt,
2721 stmt_vec_info stmt_info)
2722 {
2723 HOST_WIDE_INT i;
2724 tree itype = TREE_TYPE (op);
2725 tree prev_res = op;
2726 gcc_assert (amnt >= 0);
2727 for (i = 0; i < amnt; i++)
2728 {
2729 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2730 : dest;
2731 gimple *stmt
2732 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2733 prev_res = tmp_var;
2734 if (i < amnt - 1)
2735 append_pattern_def_seq (vinfo, stmt_info, stmt);
2736 else
2737 return stmt;
2738 }
2739 gcc_unreachable ();
2740 return NULL;
2741 }
2742
2743 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2744 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2745 the process if necessary. Append the resulting assignment statements
2746 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2747 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2748 left shifts using additions. */
2749
2750 static tree
2751 apply_binop_and_append_stmt (vec_info *vinfo,
2752 tree_code code, tree op1, tree op2,
2753 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2754 {
2755 if (integer_zerop (op2)
2756 && (code == LSHIFT_EXPR
2757 || code == PLUS_EXPR))
2758 {
2759 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2760 return op1;
2761 }
2762
2763 gimple *stmt;
2764 tree itype = TREE_TYPE (op1);
2765 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2766
2767 if (code == LSHIFT_EXPR
2768 && synth_shift_p)
2769 {
2770 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
2771 TREE_INT_CST_LOW (op2), stmt_vinfo);
2772 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2773 return tmp_var;
2774 }
2775
2776 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2777 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2778 return tmp_var;
2779 }
2780
2781 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2782 and simple arithmetic operations to be vectorized. Record the statements
2783 produced in STMT_VINFO and return the last statement in the sequence or
2784 NULL if it's not possible to synthesize such a multiplication.
2785 This function mirrors the behavior of expand_mult_const in expmed.c but
2786 works on tree-ssa form. */
2787
2788 static gimple *
2789 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
2790 stmt_vec_info stmt_vinfo)
2791 {
2792 tree itype = TREE_TYPE (op);
2793 machine_mode mode = TYPE_MODE (itype);
2794 struct algorithm alg;
2795 mult_variant variant;
2796 if (!tree_fits_shwi_p (val))
2797 return NULL;
2798
2799 /* Multiplication synthesis by shifts, adds and subs can introduce
2800 signed overflow where the original operation didn't. Perform the
2801 operations on an unsigned type and cast back to avoid this.
2802 In the future we may want to relax this for synthesis algorithms
2803 that we can prove do not cause unexpected overflow. */
2804 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2805
2806 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2807
2808 /* Targets that don't support vector shifts but support vector additions
2809 can synthesize shifts that way. */
2810 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
2811
2812 HOST_WIDE_INT hwval = tree_to_shwi (val);
2813 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2814 The vectorizer's benefit analysis will decide whether it's beneficial
2815 to do this. */
2816 bool possible = choose_mult_variant (mode, hwval, &alg,
2817 &variant, MAX_COST);
2818 if (!possible)
2819 return NULL;
2820
2821 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
2822
2823 if (!vectype
2824 || !target_supports_mult_synth_alg (&alg, variant,
2825 vectype, synth_shift_p))
2826 return NULL;
2827
2828 tree accumulator;
2829
2830 /* Clear out the sequence of statements so we can populate it below. */
2831 gimple *stmt = NULL;
2832
2833 if (cast_to_unsigned_p)
2834 {
2835 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2836 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2837 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2838 op = tmp_op;
2839 }
2840
2841 if (alg.op[0] == alg_zero)
2842 accumulator = build_int_cst (multtype, 0);
2843 else
2844 accumulator = op;
2845
2846 bool needs_fixup = (variant == negate_variant)
2847 || (variant == add_variant);
2848
2849 for (int i = 1; i < alg.ops; i++)
2850 {
2851 tree shft_log = build_int_cst (multtype, alg.log[i]);
2852 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2853 tree tmp_var = NULL_TREE;
2854
2855 switch (alg.op[i])
2856 {
2857 case alg_shift:
2858 if (synth_shift_p)
2859 stmt
2860 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
2861 alg.log[i], stmt_vinfo);
2862 else
2863 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2864 shft_log);
2865 break;
2866 case alg_add_t_m2:
2867 tmp_var
2868 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
2869 stmt_vinfo, synth_shift_p);
2870 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2871 tmp_var);
2872 break;
2873 case alg_sub_t_m2:
2874 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
2875 shft_log, stmt_vinfo,
2876 synth_shift_p);
2877 /* In some algorithms the first step involves zeroing the
2878 accumulator. If subtracting from such an accumulator
2879 just emit the negation directly. */
2880 if (integer_zerop (accumulator))
2881 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2882 else
2883 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2884 tmp_var);
2885 break;
2886 case alg_add_t2_m:
2887 tmp_var
2888 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2889 shft_log, stmt_vinfo, synth_shift_p);
2890 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2891 break;
2892 case alg_sub_t2_m:
2893 tmp_var
2894 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2895 shft_log, stmt_vinfo, synth_shift_p);
2896 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2897 break;
2898 case alg_add_factor:
2899 tmp_var
2900 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2901 shft_log, stmt_vinfo, synth_shift_p);
2902 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2903 tmp_var);
2904 break;
2905 case alg_sub_factor:
2906 tmp_var
2907 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2908 shft_log, stmt_vinfo, synth_shift_p);
2909 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2910 accumulator);
2911 break;
2912 default:
2913 gcc_unreachable ();
2914 }
2915 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2916 but rather return it directly. */
2917
2918 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2919 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2920 accumulator = accum_tmp;
2921 }
2922 if (variant == negate_variant)
2923 {
2924 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2925 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2926 accumulator = accum_tmp;
2927 if (cast_to_unsigned_p)
2928 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2929 }
2930 else if (variant == add_variant)
2931 {
2932 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2933 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2934 accumulator = accum_tmp;
2935 if (cast_to_unsigned_p)
2936 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2937 }
2938 /* Move back to a signed if needed. */
2939 if (cast_to_unsigned_p)
2940 {
2941 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2942 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2943 }
2944
2945 return stmt;
2946 }
2947
2948 /* Detect multiplication by constant and convert it into a sequence of
2949 shifts and additions, subtractions, negations. We reuse the
2950 choose_mult_variant algorithms from expmed.c
2951
2952 Input/Output:
2953
2954 STMT_VINFO: The stmt from which the pattern search begins,
2955 i.e. the mult stmt.
2956
2957 Output:
2958
2959 * TYPE_OUT: The type of the output of this pattern.
2960
2961 * Return value: A new stmt that will be used to replace
2962 the multiplication. */
2963
2964 static gimple *
2965 vect_recog_mult_pattern (vec_info *vinfo,
2966 stmt_vec_info stmt_vinfo, tree *type_out)
2967 {
2968 gimple *last_stmt = stmt_vinfo->stmt;
2969 tree oprnd0, oprnd1, vectype, itype;
2970 gimple *pattern_stmt;
2971
2972 if (!is_gimple_assign (last_stmt))
2973 return NULL;
2974
2975 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2976 return NULL;
2977
2978 oprnd0 = gimple_assign_rhs1 (last_stmt);
2979 oprnd1 = gimple_assign_rhs2 (last_stmt);
2980 itype = TREE_TYPE (oprnd0);
2981
2982 if (TREE_CODE (oprnd0) != SSA_NAME
2983 || TREE_CODE (oprnd1) != INTEGER_CST
2984 || !INTEGRAL_TYPE_P (itype)
2985 || !type_has_mode_precision_p (itype))
2986 return NULL;
2987
2988 vectype = get_vectype_for_scalar_type (vinfo, itype);
2989 if (vectype == NULL_TREE)
2990 return NULL;
2991
2992 /* If the target can handle vectorized multiplication natively,
2993 don't attempt to optimize this. */
2994 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2995 if (mul_optab != unknown_optab)
2996 {
2997 machine_mode vec_mode = TYPE_MODE (vectype);
2998 int icode = (int) optab_handler (mul_optab, vec_mode);
2999 if (icode != CODE_FOR_nothing)
3000 return NULL;
3001 }
3002
3003 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3004 oprnd0, oprnd1, stmt_vinfo);
3005 if (!pattern_stmt)
3006 return NULL;
3007
3008 /* Pattern detected. */
3009 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3010
3011 *type_out = vectype;
3012
3013 return pattern_stmt;
3014 }
3015
3016 /* Detect a signed division by a constant that wouldn't be
3017 otherwise vectorized:
3018
3019 type a_t, b_t;
3020
3021 S1 a_t = b_t / N;
3022
3023 where type 'type' is an integral type and N is a constant.
3024
3025 Similarly handle modulo by a constant:
3026
3027 S4 a_t = b_t % N;
3028
3029 Input/Output:
3030
3031 * STMT_VINFO: The stmt from which the pattern search begins,
3032 i.e. the division stmt. S1 is replaced by if N is a power
3033 of two constant and type is signed:
3034 S3 y_t = b_t < 0 ? N - 1 : 0;
3035 S2 x_t = b_t + y_t;
3036 S1' a_t = x_t >> log2 (N);
3037
3038 S4 is replaced if N is a power of two constant and
3039 type is signed by (where *_T temporaries have unsigned type):
3040 S9 y_T = b_t < 0 ? -1U : 0U;
3041 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3042 S7 z_t = (type) z_T;
3043 S6 w_t = b_t + z_t;
3044 S5 x_t = w_t & (N - 1);
3045 S4' a_t = x_t - z_t;
3046
3047 Output:
3048
3049 * TYPE_OUT: The type of the output of this pattern.
3050
3051 * Return value: A new stmt that will be used to replace the division
3052 S1 or modulo S4 stmt. */
3053
3054 static gimple *
3055 vect_recog_divmod_pattern (vec_info *vinfo,
3056 stmt_vec_info stmt_vinfo, tree *type_out)
3057 {
3058 gimple *last_stmt = stmt_vinfo->stmt;
3059 tree oprnd0, oprnd1, vectype, itype, cond;
3060 gimple *pattern_stmt, *def_stmt;
3061 enum tree_code rhs_code;
3062 optab optab;
3063 tree q;
3064 int dummy_int, prec;
3065
3066 if (!is_gimple_assign (last_stmt))
3067 return NULL;
3068
3069 rhs_code = gimple_assign_rhs_code (last_stmt);
3070 switch (rhs_code)
3071 {
3072 case TRUNC_DIV_EXPR:
3073 case EXACT_DIV_EXPR:
3074 case TRUNC_MOD_EXPR:
3075 break;
3076 default:
3077 return NULL;
3078 }
3079
3080 oprnd0 = gimple_assign_rhs1 (last_stmt);
3081 oprnd1 = gimple_assign_rhs2 (last_stmt);
3082 itype = TREE_TYPE (oprnd0);
3083 if (TREE_CODE (oprnd0) != SSA_NAME
3084 || TREE_CODE (oprnd1) != INTEGER_CST
3085 || TREE_CODE (itype) != INTEGER_TYPE
3086 || !type_has_mode_precision_p (itype))
3087 return NULL;
3088
3089 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3090 vectype = get_vectype_for_scalar_type (vinfo, itype);
3091 if (vectype == NULL_TREE)
3092 return NULL;
3093
3094 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3095 {
3096 /* If the target can handle vectorized division or modulo natively,
3097 don't attempt to optimize this, since native division is likely
3098 to give smaller code. */
3099 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3100 if (optab != unknown_optab)
3101 {
3102 machine_mode vec_mode = TYPE_MODE (vectype);
3103 int icode = (int) optab_handler (optab, vec_mode);
3104 if (icode != CODE_FOR_nothing)
3105 return NULL;
3106 }
3107 }
3108
3109 prec = TYPE_PRECISION (itype);
3110 if (integer_pow2p (oprnd1))
3111 {
3112 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3113 return NULL;
3114
3115 /* Pattern detected. */
3116 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3117
3118 *type_out = vectype;
3119
3120 /* Check if the target supports this internal function. */
3121 internal_fn ifn = IFN_DIV_POW2;
3122 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3123 {
3124 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3125
3126 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3127 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3128 gimple_call_set_lhs (div_stmt, var_div);
3129
3130 if (rhs_code == TRUNC_MOD_EXPR)
3131 {
3132 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3133 def_stmt
3134 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3135 LSHIFT_EXPR, var_div, shift);
3136 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3137 pattern_stmt
3138 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3139 MINUS_EXPR, oprnd0,
3140 gimple_assign_lhs (def_stmt));
3141 }
3142 else
3143 pattern_stmt = div_stmt;
3144 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3145
3146 return pattern_stmt;
3147 }
3148
3149 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3150 build_int_cst (itype, 0));
3151 if (rhs_code == TRUNC_DIV_EXPR
3152 || rhs_code == EXACT_DIV_EXPR)
3153 {
3154 tree var = vect_recog_temp_ssa_var (itype, NULL);
3155 tree shift;
3156 def_stmt
3157 = gimple_build_assign (var, COND_EXPR, cond,
3158 fold_build2 (MINUS_EXPR, itype, oprnd1,
3159 build_int_cst (itype, 1)),
3160 build_int_cst (itype, 0));
3161 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3162 var = vect_recog_temp_ssa_var (itype, NULL);
3163 def_stmt
3164 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3165 gimple_assign_lhs (def_stmt));
3166 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3167
3168 shift = build_int_cst (itype, tree_log2 (oprnd1));
3169 pattern_stmt
3170 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3171 RSHIFT_EXPR, var, shift);
3172 }
3173 else
3174 {
3175 tree signmask;
3176 if (compare_tree_int (oprnd1, 2) == 0)
3177 {
3178 signmask = vect_recog_temp_ssa_var (itype, NULL);
3179 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3180 build_int_cst (itype, 1),
3181 build_int_cst (itype, 0));
3182 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3183 }
3184 else
3185 {
3186 tree utype
3187 = build_nonstandard_integer_type (prec, 1);
3188 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3189 tree shift
3190 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3191 - tree_log2 (oprnd1));
3192 tree var = vect_recog_temp_ssa_var (utype, NULL);
3193
3194 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3195 build_int_cst (utype, -1),
3196 build_int_cst (utype, 0));
3197 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3198 var = vect_recog_temp_ssa_var (utype, NULL);
3199 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3200 gimple_assign_lhs (def_stmt),
3201 shift);
3202 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3203 signmask = vect_recog_temp_ssa_var (itype, NULL);
3204 def_stmt
3205 = gimple_build_assign (signmask, NOP_EXPR, var);
3206 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3207 }
3208 def_stmt
3209 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3210 PLUS_EXPR, oprnd0, signmask);
3211 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3212 def_stmt
3213 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3214 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3215 fold_build2 (MINUS_EXPR, itype, oprnd1,
3216 build_int_cst (itype, 1)));
3217 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3218
3219 pattern_stmt
3220 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3221 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3222 signmask);
3223 }
3224
3225 return pattern_stmt;
3226 }
3227
3228 if (prec > HOST_BITS_PER_WIDE_INT
3229 || integer_zerop (oprnd1))
3230 return NULL;
3231
3232 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3233 return NULL;
3234
3235 if (TYPE_UNSIGNED (itype))
3236 {
3237 unsigned HOST_WIDE_INT mh, ml;
3238 int pre_shift, post_shift;
3239 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3240 & GET_MODE_MASK (itype_mode));
3241 tree t1, t2, t3, t4;
3242
3243 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3244 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3245 return NULL;
3246
3247 /* Find a suitable multiplier and right shift count
3248 instead of multiplying with D. */
3249 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3250
3251 /* If the suggested multiplier is more than SIZE bits, we can do better
3252 for even divisors, using an initial right shift. */
3253 if (mh != 0 && (d & 1) == 0)
3254 {
3255 pre_shift = ctz_or_zero (d);
3256 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3257 &ml, &post_shift, &dummy_int);
3258 gcc_assert (!mh);
3259 }
3260 else
3261 pre_shift = 0;
3262
3263 if (mh != 0)
3264 {
3265 if (post_shift - 1 >= prec)
3266 return NULL;
3267
3268 /* t1 = oprnd0 h* ml;
3269 t2 = oprnd0 - t1;
3270 t3 = t2 >> 1;
3271 t4 = t1 + t3;
3272 q = t4 >> (post_shift - 1); */
3273 t1 = vect_recog_temp_ssa_var (itype, NULL);
3274 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3275 build_int_cst (itype, ml));
3276 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3277
3278 t2 = vect_recog_temp_ssa_var (itype, NULL);
3279 def_stmt
3280 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3281 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3282
3283 t3 = vect_recog_temp_ssa_var (itype, NULL);
3284 def_stmt
3285 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3286 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3287
3288 t4 = vect_recog_temp_ssa_var (itype, NULL);
3289 def_stmt
3290 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3291
3292 if (post_shift != 1)
3293 {
3294 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3295
3296 q = vect_recog_temp_ssa_var (itype, NULL);
3297 pattern_stmt
3298 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3299 build_int_cst (itype, post_shift - 1));
3300 }
3301 else
3302 {
3303 q = t4;
3304 pattern_stmt = def_stmt;
3305 }
3306 }
3307 else
3308 {
3309 if (pre_shift >= prec || post_shift >= prec)
3310 return NULL;
3311
3312 /* t1 = oprnd0 >> pre_shift;
3313 t2 = t1 h* ml;
3314 q = t2 >> post_shift; */
3315 if (pre_shift)
3316 {
3317 t1 = vect_recog_temp_ssa_var (itype, NULL);
3318 def_stmt
3319 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3320 build_int_cst (NULL, pre_shift));
3321 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3322 }
3323 else
3324 t1 = oprnd0;
3325
3326 t2 = vect_recog_temp_ssa_var (itype, NULL);
3327 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3328 build_int_cst (itype, ml));
3329
3330 if (post_shift)
3331 {
3332 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3333
3334 q = vect_recog_temp_ssa_var (itype, NULL);
3335 def_stmt
3336 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3337 build_int_cst (itype, post_shift));
3338 }
3339 else
3340 q = t2;
3341
3342 pattern_stmt = def_stmt;
3343 }
3344 }
3345 else
3346 {
3347 unsigned HOST_WIDE_INT ml;
3348 int post_shift;
3349 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3350 unsigned HOST_WIDE_INT abs_d;
3351 bool add = false;
3352 tree t1, t2, t3, t4;
3353
3354 /* Give up for -1. */
3355 if (d == -1)
3356 return NULL;
3357
3358 /* Since d might be INT_MIN, we have to cast to
3359 unsigned HOST_WIDE_INT before negating to avoid
3360 undefined signed overflow. */
3361 abs_d = (d >= 0
3362 ? (unsigned HOST_WIDE_INT) d
3363 : - (unsigned HOST_WIDE_INT) d);
3364
3365 /* n rem d = n rem -d */
3366 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3367 {
3368 d = abs_d;
3369 oprnd1 = build_int_cst (itype, abs_d);
3370 }
3371 if (HOST_BITS_PER_WIDE_INT >= prec
3372 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3373 /* This case is not handled correctly below. */
3374 return NULL;
3375
3376 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3377 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3378 {
3379 add = true;
3380 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3381 }
3382 if (post_shift >= prec)
3383 return NULL;
3384
3385 /* t1 = oprnd0 h* ml; */
3386 t1 = vect_recog_temp_ssa_var (itype, NULL);
3387 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3388 build_int_cst (itype, ml));
3389
3390 if (add)
3391 {
3392 /* t2 = t1 + oprnd0; */
3393 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3394 t2 = vect_recog_temp_ssa_var (itype, NULL);
3395 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3396 }
3397 else
3398 t2 = t1;
3399
3400 if (post_shift)
3401 {
3402 /* t3 = t2 >> post_shift; */
3403 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3404 t3 = vect_recog_temp_ssa_var (itype, NULL);
3405 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3406 build_int_cst (itype, post_shift));
3407 }
3408 else
3409 t3 = t2;
3410
3411 wide_int oprnd0_min, oprnd0_max;
3412 int msb = 1;
3413 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3414 {
3415 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3416 msb = 0;
3417 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3418 msb = -1;
3419 }
3420
3421 if (msb == 0 && d >= 0)
3422 {
3423 /* q = t3; */
3424 q = t3;
3425 pattern_stmt = def_stmt;
3426 }
3427 else
3428 {
3429 /* t4 = oprnd0 >> (prec - 1);
3430 or if we know from VRP that oprnd0 >= 0
3431 t4 = 0;
3432 or if we know from VRP that oprnd0 < 0
3433 t4 = -1; */
3434 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3435 t4 = vect_recog_temp_ssa_var (itype, NULL);
3436 if (msb != 1)
3437 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3438 build_int_cst (itype, msb));
3439 else
3440 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3441 build_int_cst (itype, prec - 1));
3442 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3443
3444 /* q = t3 - t4; or q = t4 - t3; */
3445 q = vect_recog_temp_ssa_var (itype, NULL);
3446 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3447 d < 0 ? t3 : t4);
3448 }
3449 }
3450
3451 if (rhs_code == TRUNC_MOD_EXPR)
3452 {
3453 tree r, t1;
3454
3455 /* We divided. Now finish by:
3456 t1 = q * oprnd1;
3457 r = oprnd0 - t1; */
3458 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3459
3460 t1 = vect_recog_temp_ssa_var (itype, NULL);
3461 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3462 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3463
3464 r = vect_recog_temp_ssa_var (itype, NULL);
3465 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3466 }
3467
3468 /* Pattern detected. */
3469 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3470
3471 *type_out = vectype;
3472 return pattern_stmt;
3473 }
3474
3475 /* Function vect_recog_mixed_size_cond_pattern
3476
3477 Try to find the following pattern:
3478
3479 type x_t, y_t;
3480 TYPE a_T, b_T, c_T;
3481 loop:
3482 S1 a_T = x_t CMP y_t ? b_T : c_T;
3483
3484 where type 'TYPE' is an integral type which has different size
3485 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3486 than 'type', the constants need to fit into an integer type
3487 with the same width as 'type') or results of conversion from 'type'.
3488
3489 Input:
3490
3491 * STMT_VINFO: The stmt from which the pattern search begins.
3492
3493 Output:
3494
3495 * TYPE_OUT: The type of the output of this pattern.
3496
3497 * Return value: A new stmt that will be used to replace the pattern.
3498 Additionally a def_stmt is added.
3499
3500 a_it = x_t CMP y_t ? b_it : c_it;
3501 a_T = (TYPE) a_it; */
3502
3503 static gimple *
3504 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3505 stmt_vec_info stmt_vinfo, tree *type_out)
3506 {
3507 gimple *last_stmt = stmt_vinfo->stmt;
3508 tree cond_expr, then_clause, else_clause;
3509 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3510 gimple *pattern_stmt, *def_stmt;
3511 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3512 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3513 bool promotion;
3514 tree comp_scalar_type;
3515
3516 if (!is_gimple_assign (last_stmt)
3517 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3518 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3519 return NULL;
3520
3521 cond_expr = gimple_assign_rhs1 (last_stmt);
3522 then_clause = gimple_assign_rhs2 (last_stmt);
3523 else_clause = gimple_assign_rhs3 (last_stmt);
3524
3525 if (!COMPARISON_CLASS_P (cond_expr))
3526 return NULL;
3527
3528 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3529 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3530 if (comp_vectype == NULL_TREE)
3531 return NULL;
3532
3533 type = gimple_expr_type (last_stmt);
3534 if (types_compatible_p (type, comp_scalar_type)
3535 || ((TREE_CODE (then_clause) != INTEGER_CST
3536 || TREE_CODE (else_clause) != INTEGER_CST)
3537 && !INTEGRAL_TYPE_P (comp_scalar_type))
3538 || !INTEGRAL_TYPE_P (type))
3539 return NULL;
3540
3541 if ((TREE_CODE (then_clause) != INTEGER_CST
3542 && !type_conversion_p (vinfo, then_clause, false,
3543 &orig_type0, &def_stmt0, &promotion))
3544 || (TREE_CODE (else_clause) != INTEGER_CST
3545 && !type_conversion_p (vinfo, else_clause, false,
3546 &orig_type1, &def_stmt1, &promotion)))
3547 return NULL;
3548
3549 if (orig_type0 && orig_type1
3550 && !types_compatible_p (orig_type0, orig_type1))
3551 return NULL;
3552
3553 if (orig_type0)
3554 {
3555 if (!types_compatible_p (orig_type0, comp_scalar_type))
3556 return NULL;
3557 then_clause = gimple_assign_rhs1 (def_stmt0);
3558 itype = orig_type0;
3559 }
3560
3561 if (orig_type1)
3562 {
3563 if (!types_compatible_p (orig_type1, comp_scalar_type))
3564 return NULL;
3565 else_clause = gimple_assign_rhs1 (def_stmt1);
3566 itype = orig_type1;
3567 }
3568
3569
3570 HOST_WIDE_INT cmp_mode_size
3571 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3572
3573 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3574 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3575 return NULL;
3576
3577 vectype = get_vectype_for_scalar_type (vinfo, type);
3578 if (vectype == NULL_TREE)
3579 return NULL;
3580
3581 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3582 return NULL;
3583
3584 if (itype == NULL_TREE)
3585 itype = build_nonstandard_integer_type (cmp_mode_size,
3586 TYPE_UNSIGNED (type));
3587
3588 if (itype == NULL_TREE
3589 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3590 return NULL;
3591
3592 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3593 if (vecitype == NULL_TREE)
3594 return NULL;
3595
3596 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3597 return NULL;
3598
3599 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3600 {
3601 if ((TREE_CODE (then_clause) == INTEGER_CST
3602 && !int_fits_type_p (then_clause, itype))
3603 || (TREE_CODE (else_clause) == INTEGER_CST
3604 && !int_fits_type_p (else_clause, itype)))
3605 return NULL;
3606 }
3607
3608 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3609 COND_EXPR, unshare_expr (cond_expr),
3610 fold_convert (itype, then_clause),
3611 fold_convert (itype, else_clause));
3612 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3613 NOP_EXPR, gimple_assign_lhs (def_stmt));
3614
3615 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3616 *type_out = vectype;
3617
3618 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3619
3620 return pattern_stmt;
3621 }
3622
3623
3624 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3625 true if bool VAR can and should be optimized that way. Assume it shouldn't
3626 in case it's a result of a comparison which can be directly vectorized into
3627 a vector comparison. Fills in STMTS with all stmts visited during the
3628 walk. */
3629
3630 static bool
3631 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3632 {
3633 tree rhs1;
3634 enum tree_code rhs_code;
3635
3636 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3637 if (!def_stmt_info)
3638 return false;
3639
3640 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3641 if (!def_stmt)
3642 return false;
3643
3644 if (stmts.contains (def_stmt))
3645 return true;
3646
3647 rhs1 = gimple_assign_rhs1 (def_stmt);
3648 rhs_code = gimple_assign_rhs_code (def_stmt);
3649 switch (rhs_code)
3650 {
3651 case SSA_NAME:
3652 if (! check_bool_pattern (rhs1, vinfo, stmts))
3653 return false;
3654 break;
3655
3656 CASE_CONVERT:
3657 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3658 return false;
3659 if (! check_bool_pattern (rhs1, vinfo, stmts))
3660 return false;
3661 break;
3662
3663 case BIT_NOT_EXPR:
3664 if (! check_bool_pattern (rhs1, vinfo, stmts))
3665 return false;
3666 break;
3667
3668 case BIT_AND_EXPR:
3669 case BIT_IOR_EXPR:
3670 case BIT_XOR_EXPR:
3671 if (! check_bool_pattern (rhs1, vinfo, stmts)
3672 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3673 return false;
3674 break;
3675
3676 default:
3677 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3678 {
3679 tree vecitype, comp_vectype;
3680
3681 /* If the comparison can throw, then is_gimple_condexpr will be
3682 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3683 if (stmt_could_throw_p (cfun, def_stmt))
3684 return false;
3685
3686 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3687 if (comp_vectype == NULL_TREE)
3688 return false;
3689
3690 tree mask_type = get_mask_type_for_scalar_type (vinfo,
3691 TREE_TYPE (rhs1));
3692 if (mask_type
3693 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3694 return false;
3695
3696 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3697 {
3698 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3699 tree itype
3700 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3701 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3702 if (vecitype == NULL_TREE)
3703 return false;
3704 }
3705 else
3706 vecitype = comp_vectype;
3707 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3708 return false;
3709 }
3710 else
3711 return false;
3712 break;
3713 }
3714
3715 bool res = stmts.add (def_stmt);
3716 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3717 gcc_assert (!res);
3718
3719 return true;
3720 }
3721
3722
3723 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3724 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3725 pattern sequence. */
3726
3727 static tree
3728 adjust_bool_pattern_cast (vec_info *vinfo,
3729 tree type, tree var, stmt_vec_info stmt_info)
3730 {
3731 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3732 NOP_EXPR, var);
3733 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
3734 get_vectype_for_scalar_type (vinfo, type));
3735 return gimple_assign_lhs (cast_stmt);
3736 }
3737
3738 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3739 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3740 type, OUT_TYPE is the desired final integer type of the whole pattern.
3741 STMT_INFO is the info of the pattern root and is where pattern stmts should
3742 be associated with. DEFS is a map of pattern defs. */
3743
3744 static void
3745 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
3746 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3747 {
3748 gimple *stmt = SSA_NAME_DEF_STMT (var);
3749 enum tree_code rhs_code, def_rhs_code;
3750 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3751 location_t loc;
3752 gimple *pattern_stmt, *def_stmt;
3753 tree trueval = NULL_TREE;
3754
3755 rhs1 = gimple_assign_rhs1 (stmt);
3756 rhs2 = gimple_assign_rhs2 (stmt);
3757 rhs_code = gimple_assign_rhs_code (stmt);
3758 loc = gimple_location (stmt);
3759 switch (rhs_code)
3760 {
3761 case SSA_NAME:
3762 CASE_CONVERT:
3763 irhs1 = *defs.get (rhs1);
3764 itype = TREE_TYPE (irhs1);
3765 pattern_stmt
3766 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3767 SSA_NAME, irhs1);
3768 break;
3769
3770 case BIT_NOT_EXPR:
3771 irhs1 = *defs.get (rhs1);
3772 itype = TREE_TYPE (irhs1);
3773 pattern_stmt
3774 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3775 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3776 break;
3777
3778 case BIT_AND_EXPR:
3779 /* Try to optimize x = y & (a < b ? 1 : 0); into
3780 x = (a < b ? y : 0);
3781
3782 E.g. for:
3783 bool a_b, b_b, c_b;
3784 TYPE d_T;
3785
3786 S1 a_b = x1 CMP1 y1;
3787 S2 b_b = x2 CMP2 y2;
3788 S3 c_b = a_b & b_b;
3789 S4 d_T = (TYPE) c_b;
3790
3791 we would normally emit:
3792
3793 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3794 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3795 S3' c_T = a_T & b_T;
3796 S4' d_T = c_T;
3797
3798 but we can save one stmt by using the
3799 result of one of the COND_EXPRs in the other COND_EXPR and leave
3800 BIT_AND_EXPR stmt out:
3801
3802 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3803 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3804 S4' f_T = c_T;
3805
3806 At least when VEC_COND_EXPR is implemented using masks
3807 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3808 computes the comparison masks and ands it, in one case with
3809 all ones vector, in the other case with a vector register.
3810 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3811 often more expensive. */
3812 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3813 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3814 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3815 {
3816 irhs1 = *defs.get (rhs1);
3817 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3818 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3819 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3820 {
3821 rhs_code = def_rhs_code;
3822 rhs1 = def_rhs1;
3823 rhs2 = gimple_assign_rhs2 (def_stmt);
3824 trueval = irhs1;
3825 goto do_compare;
3826 }
3827 else
3828 irhs2 = *defs.get (rhs2);
3829 goto and_ior_xor;
3830 }
3831 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3832 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3833 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3834 {
3835 irhs2 = *defs.get (rhs2);
3836 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3837 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3838 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3839 {
3840 rhs_code = def_rhs_code;
3841 rhs1 = def_rhs1;
3842 rhs2 = gimple_assign_rhs2 (def_stmt);
3843 trueval = irhs2;
3844 goto do_compare;
3845 }
3846 else
3847 irhs1 = *defs.get (rhs1);
3848 goto and_ior_xor;
3849 }
3850 /* FALLTHRU */
3851 case BIT_IOR_EXPR:
3852 case BIT_XOR_EXPR:
3853 irhs1 = *defs.get (rhs1);
3854 irhs2 = *defs.get (rhs2);
3855 and_ior_xor:
3856 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3857 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3858 {
3859 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3860 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3861 int out_prec = TYPE_PRECISION (out_type);
3862 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3863 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
3864 stmt_info);
3865 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3866 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
3867 stmt_info);
3868 else
3869 {
3870 irhs1 = adjust_bool_pattern_cast (vinfo,
3871 out_type, irhs1, stmt_info);
3872 irhs2 = adjust_bool_pattern_cast (vinfo,
3873 out_type, irhs2, stmt_info);
3874 }
3875 }
3876 itype = TREE_TYPE (irhs1);
3877 pattern_stmt
3878 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3879 rhs_code, irhs1, irhs2);
3880 break;
3881
3882 default:
3883 do_compare:
3884 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3885 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3886 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3887 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3888 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3889 {
3890 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3891 itype
3892 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3893 }
3894 else
3895 itype = TREE_TYPE (rhs1);
3896 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3897 if (trueval == NULL_TREE)
3898 trueval = build_int_cst (itype, 1);
3899 else
3900 gcc_checking_assert (useless_type_conversion_p (itype,
3901 TREE_TYPE (trueval)));
3902 pattern_stmt
3903 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3904 COND_EXPR, cond_expr, trueval,
3905 build_int_cst (itype, 0));
3906 break;
3907 }
3908
3909 gimple_set_location (pattern_stmt, loc);
3910 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
3911 get_vectype_for_scalar_type (vinfo, itype));
3912 defs.put (var, gimple_assign_lhs (pattern_stmt));
3913 }
3914
3915 /* Comparison function to qsort a vector of gimple stmts after UID. */
3916
3917 static int
3918 sort_after_uid (const void *p1, const void *p2)
3919 {
3920 const gimple *stmt1 = *(const gimple * const *)p1;
3921 const gimple *stmt2 = *(const gimple * const *)p2;
3922 return gimple_uid (stmt1) - gimple_uid (stmt2);
3923 }
3924
3925 /* Create pattern stmts for all stmts participating in the bool pattern
3926 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3927 OUT_TYPE. Return the def of the pattern root. */
3928
3929 static tree
3930 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
3931 tree out_type, stmt_vec_info stmt_info)
3932 {
3933 /* Gather original stmts in the bool pattern in their order of appearance
3934 in the IL. */
3935 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3936 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3937 i != bool_stmt_set.end (); ++i)
3938 bool_stmts.quick_push (*i);
3939 bool_stmts.qsort (sort_after_uid);
3940
3941 /* Now process them in that order, producing pattern stmts. */
3942 hash_map <tree, tree> defs;
3943 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3944 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
3945 out_type, stmt_info, defs);
3946
3947 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3948 gimple *pattern_stmt
3949 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3950 return gimple_assign_lhs (pattern_stmt);
3951 }
3952
3953 /* Return the proper type for converting bool VAR into
3954 an integer value or NULL_TREE if no such type exists.
3955 The type is chosen so that the converted value has the
3956 same number of elements as VAR's vector type. */
3957
3958 static tree
3959 integer_type_for_mask (tree var, vec_info *vinfo)
3960 {
3961 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3962 return NULL_TREE;
3963
3964 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3965 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
3966 return NULL_TREE;
3967
3968 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
3969 }
3970
3971 /* Function vect_recog_bool_pattern
3972
3973 Try to find pattern like following:
3974
3975 bool a_b, b_b, c_b, d_b, e_b;
3976 TYPE f_T;
3977 loop:
3978 S1 a_b = x1 CMP1 y1;
3979 S2 b_b = x2 CMP2 y2;
3980 S3 c_b = a_b & b_b;
3981 S4 d_b = x3 CMP3 y3;
3982 S5 e_b = c_b | d_b;
3983 S6 f_T = (TYPE) e_b;
3984
3985 where type 'TYPE' is an integral type. Or a similar pattern
3986 ending in
3987
3988 S6 f_Y = e_b ? r_Y : s_Y;
3989
3990 as results from if-conversion of a complex condition.
3991
3992 Input:
3993
3994 * STMT_VINFO: The stmt at the end from which the pattern
3995 search begins, i.e. cast of a bool to
3996 an integer type.
3997
3998 Output:
3999
4000 * TYPE_OUT: The type of the output of this pattern.
4001
4002 * Return value: A new stmt that will be used to replace the pattern.
4003
4004 Assuming size of TYPE is the same as size of all comparisons
4005 (otherwise some casts would be added where needed), the above
4006 sequence we create related pattern stmts:
4007 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4008 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4009 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4010 S5' e_T = c_T | d_T;
4011 S6' f_T = e_T;
4012
4013 Instead of the above S3' we could emit:
4014 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4015 S3' c_T = a_T | b_T;
4016 but the above is more efficient. */
4017
4018 static gimple *
4019 vect_recog_bool_pattern (vec_info *vinfo,
4020 stmt_vec_info stmt_vinfo, tree *type_out)
4021 {
4022 gimple *last_stmt = stmt_vinfo->stmt;
4023 enum tree_code rhs_code;
4024 tree var, lhs, rhs, vectype;
4025 gimple *pattern_stmt;
4026
4027 if (!is_gimple_assign (last_stmt))
4028 return NULL;
4029
4030 var = gimple_assign_rhs1 (last_stmt);
4031 lhs = gimple_assign_lhs (last_stmt);
4032
4033 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4034 return NULL;
4035
4036 hash_set<gimple *> bool_stmts;
4037
4038 rhs_code = gimple_assign_rhs_code (last_stmt);
4039 if (CONVERT_EXPR_CODE_P (rhs_code))
4040 {
4041 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4042 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
4043 return NULL;
4044 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4045 if (vectype == NULL_TREE)
4046 return NULL;
4047
4048 if (check_bool_pattern (var, vinfo, bool_stmts))
4049 {
4050 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4051 TREE_TYPE (lhs), stmt_vinfo);
4052 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4053 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4054 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4055 else
4056 pattern_stmt
4057 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4058 }
4059 else
4060 {
4061 tree type = integer_type_for_mask (var, vinfo);
4062 tree cst0, cst1, tmp;
4063
4064 if (!type)
4065 return NULL;
4066
4067 /* We may directly use cond with narrowed type to avoid
4068 multiple cond exprs with following result packing and
4069 perform single cond with packed mask instead. In case
4070 of widening we better make cond first and then extract
4071 results. */
4072 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4073 type = TREE_TYPE (lhs);
4074
4075 cst0 = build_int_cst (type, 0);
4076 cst1 = build_int_cst (type, 1);
4077 tmp = vect_recog_temp_ssa_var (type, NULL);
4078 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4079
4080 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4081 {
4082 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4083 append_pattern_def_seq (vinfo, stmt_vinfo,
4084 pattern_stmt, new_vectype);
4085
4086 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4087 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4088 }
4089 }
4090
4091 *type_out = vectype;
4092 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4093
4094 return pattern_stmt;
4095 }
4096 else if (rhs_code == COND_EXPR
4097 && TREE_CODE (var) == SSA_NAME)
4098 {
4099 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4100 if (vectype == NULL_TREE)
4101 return NULL;
4102
4103 /* Build a scalar type for the boolean result that when
4104 vectorized matches the vector type of the result in
4105 size and number of elements. */
4106 unsigned prec
4107 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4108 TYPE_VECTOR_SUBPARTS (vectype));
4109
4110 tree type
4111 = build_nonstandard_integer_type (prec,
4112 TYPE_UNSIGNED (TREE_TYPE (var)));
4113 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4114 return NULL;
4115
4116 if (!check_bool_pattern (var, vinfo, bool_stmts))
4117 return NULL;
4118
4119 rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4120
4121 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4122 pattern_stmt
4123 = gimple_build_assign (lhs, COND_EXPR,
4124 build2 (NE_EXPR, boolean_type_node,
4125 rhs, build_int_cst (type, 0)),
4126 gimple_assign_rhs2 (last_stmt),
4127 gimple_assign_rhs3 (last_stmt));
4128 *type_out = vectype;
4129 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4130
4131 return pattern_stmt;
4132 }
4133 else if (rhs_code == SSA_NAME
4134 && STMT_VINFO_DATA_REF (stmt_vinfo))
4135 {
4136 stmt_vec_info pattern_stmt_info;
4137 tree nunits_vectype;
4138 if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4139 &nunits_vectype)
4140 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4141 return NULL;
4142
4143 if (check_bool_pattern (var, vinfo, bool_stmts))
4144 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4145 TREE_TYPE (vectype), stmt_vinfo);
4146 else
4147 {
4148 tree type = integer_type_for_mask (var, vinfo);
4149 tree cst0, cst1, new_vectype;
4150
4151 if (!type)
4152 return NULL;
4153
4154 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4155 type = TREE_TYPE (vectype);
4156
4157 cst0 = build_int_cst (type, 0);
4158 cst1 = build_int_cst (type, 1);
4159 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4160
4161 rhs = vect_recog_temp_ssa_var (type, NULL);
4162 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4163 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4164 }
4165
4166 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4167 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4168 {
4169 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4170 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4171 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4172 rhs = rhs2;
4173 }
4174 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4175 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4176 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4177 *type_out = vectype;
4178 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4179
4180 return pattern_stmt;
4181 }
4182 else
4183 return NULL;
4184 }
4185
4186
4187 /* A helper for vect_recog_mask_conversion_pattern. Build
4188 conversion of MASK to a type suitable for masking VECTYPE.
4189 Built statement gets required vectype and is appended to
4190 a pattern sequence of STMT_VINFO.
4191
4192 Return converted mask. */
4193
4194 static tree
4195 build_mask_conversion (vec_info *vinfo,
4196 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4197 {
4198 gimple *stmt;
4199 tree masktype, tmp;
4200
4201 masktype = truth_type_for (vectype);
4202 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4203 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4204 append_pattern_def_seq (vinfo, stmt_vinfo,
4205 stmt, masktype, TREE_TYPE (vectype));
4206
4207 return tmp;
4208 }
4209
4210
4211 /* Function vect_recog_mask_conversion_pattern
4212
4213 Try to find statements which require boolean type
4214 converison. Additional conversion statements are
4215 added to handle such cases. For example:
4216
4217 bool m_1, m_2, m_3;
4218 int i_4, i_5;
4219 double d_6, d_7;
4220 char c_1, c_2, c_3;
4221
4222 S1 m_1 = i_4 > i_5;
4223 S2 m_2 = d_6 < d_7;
4224 S3 m_3 = m_1 & m_2;
4225 S4 c_1 = m_3 ? c_2 : c_3;
4226
4227 Will be transformed into:
4228
4229 S1 m_1 = i_4 > i_5;
4230 S2 m_2 = d_6 < d_7;
4231 S3'' m_2' = (_Bool[bitsize=32])m_2
4232 S3' m_3' = m_1 & m_2';
4233 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4234 S4' c_1' = m_3'' ? c_2 : c_3; */
4235
4236 static gimple *
4237 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4238 stmt_vec_info stmt_vinfo, tree *type_out)
4239 {
4240 gimple *last_stmt = stmt_vinfo->stmt;
4241 enum tree_code rhs_code;
4242 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4243 tree vectype1, vectype2;
4244 stmt_vec_info pattern_stmt_info;
4245
4246 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4247 if (is_gimple_call (last_stmt)
4248 && gimple_call_internal_p (last_stmt))
4249 {
4250 gcall *pattern_stmt;
4251
4252 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4253 int mask_argno = internal_fn_mask_index (ifn);
4254 if (mask_argno < 0)
4255 return NULL;
4256
4257 bool store_p = internal_store_fn_p (ifn);
4258 if (store_p)
4259 {
4260 int rhs_index = internal_fn_stored_value_index (ifn);
4261 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4262 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4263 }
4264 else
4265 {
4266 lhs = gimple_call_lhs (last_stmt);
4267 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4268 }
4269
4270 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4271 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4272 if (!mask_arg_type)
4273 return NULL;
4274 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4275
4276 if (!vectype1 || !vectype2
4277 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4278 TYPE_VECTOR_SUBPARTS (vectype2)))
4279 return NULL;
4280
4281 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4282
4283 auto_vec<tree, 8> args;
4284 unsigned int nargs = gimple_call_num_args (last_stmt);
4285 args.safe_grow (nargs);
4286 for (unsigned int i = 0; i < nargs; ++i)
4287 args[i] = ((int) i == mask_argno
4288 ? tmp
4289 : gimple_call_arg (last_stmt, i));
4290 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4291
4292 if (!store_p)
4293 {
4294 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4295 gimple_call_set_lhs (pattern_stmt, lhs);
4296 }
4297 gimple_call_set_nothrow (pattern_stmt, true);
4298
4299 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4300 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4301 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4302
4303 *type_out = vectype1;
4304 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4305
4306 return pattern_stmt;
4307 }
4308
4309 if (!is_gimple_assign (last_stmt))
4310 return NULL;
4311
4312 gimple *pattern_stmt;
4313 lhs = gimple_assign_lhs (last_stmt);
4314 rhs1 = gimple_assign_rhs1 (last_stmt);
4315 rhs_code = gimple_assign_rhs_code (last_stmt);
4316
4317 /* Check for cond expression requiring mask conversion. */
4318 if (rhs_code == COND_EXPR)
4319 {
4320 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4321
4322 if (TREE_CODE (rhs1) == SSA_NAME)
4323 {
4324 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4325 if (!rhs1_type)
4326 return NULL;
4327 }
4328 else if (COMPARISON_CLASS_P (rhs1))
4329 {
4330 /* Check whether we're comparing scalar booleans and (if so)
4331 whether a better mask type exists than the mask associated
4332 with boolean-sized elements. This avoids unnecessary packs
4333 and unpacks if the booleans are set from comparisons of
4334 wider types. E.g. in:
4335
4336 int x1, x2, x3, x4, y1, y1;
4337 ...
4338 bool b1 = (x1 == x2);
4339 bool b2 = (x3 == x4);
4340 ... = b1 == b2 ? y1 : y2;
4341
4342 it is better for b1 and b2 to use the mask type associated
4343 with int elements rather bool (byte) elements. */
4344 rhs1_type = integer_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4345 if (!rhs1_type)
4346 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4347 }
4348 else
4349 return NULL;
4350
4351 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4352
4353 if (!vectype1 || !vectype2)
4354 return NULL;
4355
4356 /* Continue if a conversion is needed. Also continue if we have
4357 a comparison whose vector type would normally be different from
4358 VECTYPE2 when considered in isolation. In that case we'll
4359 replace the comparison with an SSA name (so that we can record
4360 its vector type) and behave as though the comparison was an SSA
4361 name from the outset. */
4362 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4363 TYPE_VECTOR_SUBPARTS (vectype2))
4364 && (TREE_CODE (rhs1) == SSA_NAME
4365 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4366 return NULL;
4367
4368 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4369 in place, we can handle it in vectorizable_condition. This avoids
4370 unnecessary promotion stmts and increased vectorization factor. */
4371 if (COMPARISON_CLASS_P (rhs1)
4372 && INTEGRAL_TYPE_P (rhs1_type)
4373 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4374 TYPE_VECTOR_SUBPARTS (vectype2)))
4375 {
4376 enum vect_def_type dt;
4377 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4378 && dt == vect_external_def
4379 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4380 && (dt == vect_external_def
4381 || dt == vect_constant_def))
4382 {
4383 tree wide_scalar_type = build_nonstandard_integer_type
4384 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4385 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4386 wide_scalar_type);
4387 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4388 return NULL;
4389 }
4390 }
4391
4392 /* If rhs1 is a comparison we need to move it into a
4393 separate statement. */
4394 if (TREE_CODE (rhs1) != SSA_NAME)
4395 {
4396 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4397 pattern_stmt = gimple_build_assign (tmp, rhs1);
4398 rhs1 = tmp;
4399 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4400 rhs1_type);
4401 }
4402
4403 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4404 TYPE_VECTOR_SUBPARTS (vectype2)))
4405 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4406 else
4407 tmp = rhs1;
4408
4409 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4410 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4411 gimple_assign_rhs2 (last_stmt),
4412 gimple_assign_rhs3 (last_stmt));
4413
4414 *type_out = vectype1;
4415 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4416
4417 return pattern_stmt;
4418 }
4419
4420 /* Now check for binary boolean operations requiring conversion for
4421 one of operands. */
4422 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4423 return NULL;
4424
4425 if (rhs_code != BIT_IOR_EXPR
4426 && rhs_code != BIT_XOR_EXPR
4427 && rhs_code != BIT_AND_EXPR
4428 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4429 return NULL;
4430
4431 rhs2 = gimple_assign_rhs2 (last_stmt);
4432
4433 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4434 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4435
4436 if (!rhs1_type || !rhs2_type
4437 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4438 return NULL;
4439
4440 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4441 {
4442 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4443 if (!vectype1)
4444 return NULL;
4445 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4446 }
4447 else
4448 {
4449 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4450 if (!vectype1)
4451 return NULL;
4452 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4453 }
4454
4455 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4456 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4457
4458 *type_out = vectype1;
4459 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4460
4461 return pattern_stmt;
4462 }
4463
4464 /* STMT_INFO is a load or store. If the load or store is conditional, return
4465 the boolean condition under which it occurs, otherwise return null. */
4466
4467 static tree
4468 vect_get_load_store_mask (stmt_vec_info stmt_info)
4469 {
4470 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4471 {
4472 gcc_assert (gimple_assign_single_p (def_assign));
4473 return NULL_TREE;
4474 }
4475
4476 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4477 {
4478 internal_fn ifn = gimple_call_internal_fn (def_call);
4479 int mask_index = internal_fn_mask_index (ifn);
4480 return gimple_call_arg (def_call, mask_index);
4481 }
4482
4483 gcc_unreachable ();
4484 }
4485
4486 /* Return MASK if MASK is suitable for masking an operation on vectors
4487 of type VECTYPE, otherwise convert it into such a form and return
4488 the result. Associate any conversion statements with STMT_INFO's
4489 pattern. */
4490
4491 static tree
4492 vect_convert_mask_for_vectype (tree mask, tree vectype,
4493 stmt_vec_info stmt_info, vec_info *vinfo)
4494 {
4495 tree mask_type = integer_type_for_mask (mask, vinfo);
4496 if (mask_type)
4497 {
4498 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4499 if (mask_vectype
4500 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4501 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4502 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4503 }
4504 return mask;
4505 }
4506
4507 /* Return the equivalent of:
4508
4509 fold_convert (TYPE, VALUE)
4510
4511 with the expectation that the operation will be vectorized.
4512 If new statements are needed, add them as pattern statements
4513 to STMT_INFO. */
4514
4515 static tree
4516 vect_add_conversion_to_pattern (vec_info *vinfo,
4517 tree type, tree value, stmt_vec_info stmt_info)
4518 {
4519 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4520 return value;
4521
4522 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4523 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4524 append_pattern_def_seq (vinfo, stmt_info, conversion,
4525 get_vectype_for_scalar_type (vinfo, type));
4526 return new_value;
4527 }
4528
4529 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4530 internal function. Return the final statement on success and set
4531 *TYPE_OUT to the vector type being loaded or stored.
4532
4533 This function only handles gathers and scatters that were recognized
4534 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4535
4536 static gimple *
4537 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4538 stmt_vec_info stmt_info, tree *type_out)
4539 {
4540 /* Currently we only support this for loop vectorization. */
4541 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4542 if (!loop_vinfo)
4543 return NULL;
4544
4545 /* Make sure that we're looking at a gather load or scatter store. */
4546 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4547 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4548 return NULL;
4549
4550 /* Get the boolean that controls whether the load or store happens.
4551 This is null if the operation is unconditional. */
4552 tree mask = vect_get_load_store_mask (stmt_info);
4553
4554 /* Make sure that the target supports an appropriate internal
4555 function for the gather/scatter operation. */
4556 gather_scatter_info gs_info;
4557 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4558 || gs_info.decl)
4559 return NULL;
4560
4561 /* Convert the mask to the right form. */
4562 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4563 gs_info.element_type);
4564 if (mask)
4565 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4566 loop_vinfo);
4567
4568 /* Get the invariant base and non-invariant offset, converting the
4569 latter to the same width as the vector elements. */
4570 tree base = gs_info.base;
4571 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4572 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4573 gs_info.offset, stmt_info);
4574
4575 /* Build the new pattern statement. */
4576 tree scale = size_int (gs_info.scale);
4577 gcall *pattern_stmt;
4578 if (DR_IS_READ (dr))
4579 {
4580 tree zero = build_zero_cst (gs_info.element_type);
4581 if (mask != NULL)
4582 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4583 offset, scale, zero, mask);
4584 else
4585 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4586 offset, scale, zero);
4587 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4588 gimple_call_set_lhs (pattern_stmt, load_lhs);
4589 }
4590 else
4591 {
4592 tree rhs = vect_get_store_rhs (stmt_info);
4593 if (mask != NULL)
4594 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4595 base, offset, scale, rhs,
4596 mask);
4597 else
4598 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4599 base, offset, scale, rhs);
4600 }
4601 gimple_call_set_nothrow (pattern_stmt, true);
4602
4603 /* Copy across relevant vectorization info and associate DR with the
4604 new pattern statement instead of the original statement. */
4605 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4606 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4607
4608 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4609 *type_out = vectype;
4610 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4611
4612 return pattern_stmt;
4613 }
4614
4615 /* Return true if TYPE is a non-boolean integer type. These are the types
4616 that we want to consider for narrowing. */
4617
4618 static bool
4619 vect_narrowable_type_p (tree type)
4620 {
4621 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4622 }
4623
4624 /* Return true if the operation given by CODE can be truncated to N bits
4625 when only N bits of the output are needed. This is only true if bit N+1
4626 of the inputs has no effect on the low N bits of the result. */
4627
4628 static bool
4629 vect_truncatable_operation_p (tree_code code)
4630 {
4631 switch (code)
4632 {
4633 case PLUS_EXPR:
4634 case MINUS_EXPR:
4635 case MULT_EXPR:
4636 case BIT_AND_EXPR:
4637 case BIT_IOR_EXPR:
4638 case BIT_XOR_EXPR:
4639 case COND_EXPR:
4640 return true;
4641
4642 default:
4643 return false;
4644 }
4645 }
4646
4647 /* Record that STMT_INFO could be changed from operating on TYPE to
4648 operating on a type with the precision and sign given by PRECISION
4649 and SIGN respectively. PRECISION is an arbitrary bit precision;
4650 it might not be a whole number of bytes. */
4651
4652 static void
4653 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4654 unsigned int precision, signop sign)
4655 {
4656 /* Round the precision up to a whole number of bytes. */
4657 precision = vect_element_precision (precision);
4658 if (precision < TYPE_PRECISION (type)
4659 && (!stmt_info->operation_precision
4660 || stmt_info->operation_precision > precision))
4661 {
4662 stmt_info->operation_precision = precision;
4663 stmt_info->operation_sign = sign;
4664 }
4665 }
4666
4667 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4668 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4669 is an arbitrary bit precision; it might not be a whole number of bytes. */
4670
4671 static void
4672 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4673 unsigned int min_input_precision)
4674 {
4675 /* This operation in isolation only requires the inputs to have
4676 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4677 that MIN_INPUT_PRECISION is a natural precision for the chain
4678 as a whole. E.g. consider something like:
4679
4680 unsigned short *x, *y;
4681 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4682
4683 The right shift can be done on unsigned chars, and only requires the
4684 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4685 approach would mean turning a natural chain of single-vector unsigned
4686 short operations into one that truncates "*x" and then extends
4687 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4688 operation and one vector for each unsigned char operation.
4689 This would be a significant pessimization.
4690
4691 Instead only propagate the maximum of this precision and the precision
4692 required by the users of the result. This means that we don't pessimize
4693 the case above but continue to optimize things like:
4694
4695 unsigned char *y;
4696 unsigned short *x;
4697 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4698
4699 Here we would truncate two vectors of *x to a single vector of
4700 unsigned chars and use single-vector unsigned char operations for
4701 everything else, rather than doing two unsigned short copies of
4702 "(*x & 0xf0) >> 4" and then truncating the result. */
4703 min_input_precision = MAX (min_input_precision,
4704 stmt_info->min_output_precision);
4705
4706 if (min_input_precision < TYPE_PRECISION (type)
4707 && (!stmt_info->min_input_precision
4708 || stmt_info->min_input_precision > min_input_precision))
4709 stmt_info->min_input_precision = min_input_precision;
4710 }
4711
4712 /* Subroutine of vect_determine_min_output_precision. Return true if
4713 we can calculate a reduced number of output bits for STMT_INFO,
4714 whose result is LHS. */
4715
4716 static bool
4717 vect_determine_min_output_precision_1 (vec_info *vinfo,
4718 stmt_vec_info stmt_info, tree lhs)
4719 {
4720 /* Take the maximum precision required by users of the result. */
4721 unsigned int precision = 0;
4722 imm_use_iterator iter;
4723 use_operand_p use;
4724 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4725 {
4726 gimple *use_stmt = USE_STMT (use);
4727 if (is_gimple_debug (use_stmt))
4728 continue;
4729 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4730 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4731 return false;
4732 /* The input precision recorded for COND_EXPRs applies only to the
4733 "then" and "else" values. */
4734 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4735 if (assign
4736 && gimple_assign_rhs_code (assign) == COND_EXPR
4737 && use->use != gimple_assign_rhs2_ptr (assign)
4738 && use->use != gimple_assign_rhs3_ptr (assign))
4739 return false;
4740 precision = MAX (precision, use_stmt_info->min_input_precision);
4741 }
4742
4743 if (dump_enabled_p ())
4744 dump_printf_loc (MSG_NOTE, vect_location,
4745 "only the low %d bits of %T are significant\n",
4746 precision, lhs);
4747 stmt_info->min_output_precision = precision;
4748 return true;
4749 }
4750
4751 /* Calculate min_output_precision for STMT_INFO. */
4752
4753 static void
4754 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
4755 {
4756 /* We're only interested in statements with a narrowable result. */
4757 tree lhs = gimple_get_lhs (stmt_info->stmt);
4758 if (!lhs
4759 || TREE_CODE (lhs) != SSA_NAME
4760 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4761 return;
4762
4763 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
4764 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4765 }
4766
4767 /* Use range information to decide whether STMT (described by STMT_INFO)
4768 could be done in a narrower type. This is effectively a forward
4769 propagation, since it uses context-independent information that applies
4770 to all users of an SSA name. */
4771
4772 static void
4773 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4774 {
4775 tree lhs = gimple_assign_lhs (stmt);
4776 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4777 return;
4778
4779 tree type = TREE_TYPE (lhs);
4780 if (!vect_narrowable_type_p (type))
4781 return;
4782
4783 /* First see whether we have any useful range information for the result. */
4784 unsigned int precision = TYPE_PRECISION (type);
4785 signop sign = TYPE_SIGN (type);
4786 wide_int min_value, max_value;
4787 if (!vect_get_range_info (lhs, &min_value, &max_value))
4788 return;
4789
4790 tree_code code = gimple_assign_rhs_code (stmt);
4791 unsigned int nops = gimple_num_ops (stmt);
4792
4793 if (!vect_truncatable_operation_p (code))
4794 /* Check that all relevant input operands are compatible, and update
4795 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4796 for (unsigned int i = 1; i < nops; ++i)
4797 {
4798 tree op = gimple_op (stmt, i);
4799 if (TREE_CODE (op) == INTEGER_CST)
4800 {
4801 /* Don't require the integer to have RHS_TYPE (which it might
4802 not for things like shift amounts, etc.), but do require it
4803 to fit the type. */
4804 if (!int_fits_type_p (op, type))
4805 return;
4806
4807 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4808 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4809 }
4810 else if (TREE_CODE (op) == SSA_NAME)
4811 {
4812 /* Ignore codes that don't take uniform arguments. */
4813 if (!types_compatible_p (TREE_TYPE (op), type))
4814 return;
4815
4816 wide_int op_min_value, op_max_value;
4817 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4818 return;
4819
4820 min_value = wi::min (min_value, op_min_value, sign);
4821 max_value = wi::max (max_value, op_max_value, sign);
4822 }
4823 else
4824 return;
4825 }
4826
4827 /* Try to switch signed types for unsigned types if we can.
4828 This is better for two reasons. First, unsigned ops tend
4829 to be cheaper than signed ops. Second, it means that we can
4830 handle things like:
4831
4832 signed char c;
4833 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4834
4835 as:
4836
4837 signed char c;
4838 unsigned short res_1 = (unsigned short) c & 0xff00;
4839 int res = (int) res_1;
4840
4841 where the intermediate result res_1 has unsigned rather than
4842 signed type. */
4843 if (sign == SIGNED && !wi::neg_p (min_value))
4844 sign = UNSIGNED;
4845
4846 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4847 unsigned int precision1 = wi::min_precision (min_value, sign);
4848 unsigned int precision2 = wi::min_precision (max_value, sign);
4849 unsigned int value_precision = MAX (precision1, precision2);
4850 if (value_precision >= precision)
4851 return;
4852
4853 if (dump_enabled_p ())
4854 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4855 " without loss of precision: %G",
4856 sign == SIGNED ? "signed" : "unsigned",
4857 value_precision, stmt);
4858
4859 vect_set_operation_type (stmt_info, type, value_precision, sign);
4860 vect_set_min_input_precision (stmt_info, type, value_precision);
4861 }
4862
4863 /* Use information about the users of STMT's result to decide whether
4864 STMT (described by STMT_INFO) could be done in a narrower type.
4865 This is effectively a backward propagation. */
4866
4867 static void
4868 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4869 {
4870 tree_code code = gimple_assign_rhs_code (stmt);
4871 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4872 tree type = TREE_TYPE (gimple_op (stmt, opno));
4873 if (!vect_narrowable_type_p (type))
4874 return;
4875
4876 unsigned int precision = TYPE_PRECISION (type);
4877 unsigned int operation_precision, min_input_precision;
4878 switch (code)
4879 {
4880 CASE_CONVERT:
4881 /* Only the bits that contribute to the output matter. Don't change
4882 the precision of the operation itself. */
4883 operation_precision = precision;
4884 min_input_precision = stmt_info->min_output_precision;
4885 break;
4886
4887 case LSHIFT_EXPR:
4888 case RSHIFT_EXPR:
4889 {
4890 tree shift = gimple_assign_rhs2 (stmt);
4891 if (TREE_CODE (shift) != INTEGER_CST
4892 || !wi::ltu_p (wi::to_widest (shift), precision))
4893 return;
4894 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4895 if (code == LSHIFT_EXPR)
4896 {
4897 /* We need CONST_SHIFT fewer bits of the input. */
4898 operation_precision = stmt_info->min_output_precision;
4899 min_input_precision = (MAX (operation_precision, const_shift)
4900 - const_shift);
4901 }
4902 else
4903 {
4904 /* We need CONST_SHIFT extra bits to do the operation. */
4905 operation_precision = (stmt_info->min_output_precision
4906 + const_shift);
4907 min_input_precision = operation_precision;
4908 }
4909 break;
4910 }
4911
4912 default:
4913 if (vect_truncatable_operation_p (code))
4914 {
4915 /* Input bit N has no effect on output bits N-1 and lower. */
4916 operation_precision = stmt_info->min_output_precision;
4917 min_input_precision = operation_precision;
4918 break;
4919 }
4920 return;
4921 }
4922
4923 if (operation_precision < precision)
4924 {
4925 if (dump_enabled_p ())
4926 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4927 " without affecting users: %G",
4928 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4929 operation_precision, stmt);
4930 vect_set_operation_type (stmt_info, type, operation_precision,
4931 TYPE_SIGN (type));
4932 }
4933 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4934 }
4935
4936 /* Return true if the statement described by STMT_INFO sets a boolean
4937 SSA_NAME and if we know how to vectorize this kind of statement using
4938 vector mask types. */
4939
4940 static bool
4941 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
4942 {
4943 tree lhs = gimple_get_lhs (stmt_info->stmt);
4944 if (!lhs
4945 || TREE_CODE (lhs) != SSA_NAME
4946 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4947 return false;
4948
4949 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
4950 {
4951 tree_code rhs_code = gimple_assign_rhs_code (assign);
4952 switch (rhs_code)
4953 {
4954 CASE_CONVERT:
4955 case SSA_NAME:
4956 case BIT_NOT_EXPR:
4957 case BIT_IOR_EXPR:
4958 case BIT_XOR_EXPR:
4959 case BIT_AND_EXPR:
4960 return true;
4961
4962 default:
4963 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
4964 }
4965 }
4966 return false;
4967 }
4968
4969 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
4970 a vector mask type instead of a normal vector type. Record the
4971 result in STMT_INFO->mask_precision. */
4972
4973 static void
4974 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
4975 {
4976 if (!possible_vector_mask_operation_p (stmt_info)
4977 || stmt_info->mask_precision)
4978 return;
4979
4980 auto_vec<stmt_vec_info, 32> worklist;
4981 worklist.quick_push (stmt_info);
4982 while (!worklist.is_empty ())
4983 {
4984 stmt_info = worklist.last ();
4985 unsigned int orig_length = worklist.length ();
4986
4987 /* If at least one boolean input uses a vector mask type,
4988 pick the mask type with the narrowest elements.
4989
4990 ??? This is the traditional behavior. It should always produce
4991 the smallest number of operations, but isn't necessarily the
4992 optimal choice. For example, if we have:
4993
4994 a = b & c
4995
4996 where:
4997
4998 - the user of a wants it to have a mask type for 16-bit elements (M16)
4999 - b also uses M16
5000 - c uses a mask type for 8-bit elements (M8)
5001
5002 then picking M8 gives:
5003
5004 - 1 M16->M8 pack for b
5005 - 1 M8 AND for a
5006 - 2 M8->M16 unpacks for the user of a
5007
5008 whereas picking M16 would have given:
5009
5010 - 2 M8->M16 unpacks for c
5011 - 2 M16 ANDs for a
5012
5013 The number of operations are equal, but M16 would have given
5014 a shorter dependency chain and allowed more ILP. */
5015 unsigned int precision = ~0U;
5016 gassign *assign = as_a <gassign *> (stmt_info->stmt);
5017 unsigned int nops = gimple_num_ops (assign);
5018 for (unsigned int i = 1; i < nops; ++i)
5019 {
5020 tree rhs = gimple_op (assign, i);
5021 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5022 continue;
5023
5024 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5025 if (!def_stmt_info)
5026 /* Don't let external or constant operands influence the choice.
5027 We can convert them to whichever vector type we pick. */
5028 continue;
5029
5030 if (def_stmt_info->mask_precision)
5031 {
5032 if (precision > def_stmt_info->mask_precision)
5033 precision = def_stmt_info->mask_precision;
5034 }
5035 else if (possible_vector_mask_operation_p (def_stmt_info))
5036 worklist.safe_push (def_stmt_info);
5037 }
5038
5039 /* Defer the choice if we need to visit operands first. */
5040 if (orig_length != worklist.length ())
5041 continue;
5042
5043 /* If the statement compares two values that shouldn't use vector masks,
5044 try comparing the values as normal scalars instead. */
5045 tree_code rhs_code = gimple_assign_rhs_code (assign);
5046 if (precision == ~0U
5047 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5048 {
5049 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5050 scalar_mode mode;
5051 tree vectype, mask_type;
5052 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5053 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5054 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5055 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5056 precision = GET_MODE_BITSIZE (mode);
5057 }
5058
5059 if (dump_enabled_p ())
5060 {
5061 if (precision == ~0U)
5062 dump_printf_loc (MSG_NOTE, vect_location,
5063 "using normal nonmask vectors for %G",
5064 stmt_info->stmt);
5065 else
5066 dump_printf_loc (MSG_NOTE, vect_location,
5067 "using boolean precision %d for %G",
5068 precision, stmt_info->stmt);
5069 }
5070
5071 stmt_info->mask_precision = precision;
5072 worklist.pop ();
5073 }
5074 }
5075
5076 /* Handle vect_determine_precisions for STMT_INFO, given that we
5077 have already done so for the users of its result. */
5078
5079 void
5080 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5081 {
5082 vect_determine_min_output_precision (vinfo, stmt_info);
5083 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5084 {
5085 vect_determine_precisions_from_range (stmt_info, stmt);
5086 vect_determine_precisions_from_users (stmt_info, stmt);
5087 }
5088 vect_determine_mask_precision (vinfo, stmt_info);
5089 }
5090
5091 /* Walk backwards through the vectorizable region to determine the
5092 values of these fields:
5093
5094 - min_output_precision
5095 - min_input_precision
5096 - operation_precision
5097 - operation_sign. */
5098
5099 void
5100 vect_determine_precisions (vec_info *vinfo)
5101 {
5102 DUMP_VECT_SCOPE ("vect_determine_precisions");
5103
5104 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5105 {
5106 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5107 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5108 unsigned int nbbs = loop->num_nodes;
5109
5110 for (unsigned int i = 0; i < nbbs; i++)
5111 {
5112 basic_block bb = bbs[nbbs - i - 1];
5113 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5114 !gsi_end_p (si); gsi_prev (&si))
5115 if (!is_gimple_debug (gsi_stmt (si)))
5116 vect_determine_stmt_precisions
5117 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5118 }
5119 }
5120 else
5121 {
5122 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5123 gimple_stmt_iterator si = bb_vinfo->region_end;
5124 gimple *stmt;
5125 do
5126 {
5127 if (!gsi_stmt (si))
5128 si = gsi_last_bb (bb_vinfo->bb);
5129 else
5130 gsi_prev (&si);
5131 stmt = gsi_stmt (si);
5132 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
5133 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5134 vect_determine_stmt_precisions (vinfo, stmt_info);
5135 }
5136 while (stmt != gsi_stmt (bb_vinfo->region_begin));
5137 }
5138 }
5139
5140 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5141
5142 struct vect_recog_func
5143 {
5144 vect_recog_func_ptr fn;
5145 const char *name;
5146 };
5147
5148 /* Note that ordering matters - the first pattern matching on a stmt is
5149 taken which means usually the more complex one needs to preceed the
5150 less comples onex (widen_sum only after dot_prod or sad for example). */
5151 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5152 { vect_recog_over_widening_pattern, "over_widening" },
5153 /* Must come after over_widening, which narrows the shift as much as
5154 possible beforehand. */
5155 { vect_recog_average_pattern, "average" },
5156 { vect_recog_mulhs_pattern, "mult_high" },
5157 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5158 { vect_recog_widen_mult_pattern, "widen_mult" },
5159 { vect_recog_dot_prod_pattern, "dot_prod" },
5160 { vect_recog_sad_pattern, "sad" },
5161 { vect_recog_widen_sum_pattern, "widen_sum" },
5162 { vect_recog_pow_pattern, "pow" },
5163 { vect_recog_widen_shift_pattern, "widen_shift" },
5164 { vect_recog_rotate_pattern, "rotate" },
5165 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5166 { vect_recog_divmod_pattern, "divmod" },
5167 { vect_recog_mult_pattern, "mult" },
5168 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5169 { vect_recog_bool_pattern, "bool" },
5170 /* This must come before mask conversion, and includes the parts
5171 of mask conversion that are needed for gather and scatter
5172 internal functions. */
5173 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5174 { vect_recog_mask_conversion_pattern, "mask_conversion" }
5175 };
5176
5177 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5178
5179 /* Mark statements that are involved in a pattern. */
5180
5181 static inline void
5182 vect_mark_pattern_stmts (vec_info *vinfo,
5183 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5184 tree pattern_vectype)
5185 {
5186 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5187 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5188
5189 gimple *orig_pattern_stmt = NULL;
5190 if (is_pattern_stmt_p (orig_stmt_info))
5191 {
5192 /* We're replacing a statement in an existing pattern definition
5193 sequence. */
5194 orig_pattern_stmt = orig_stmt_info->stmt;
5195 if (dump_enabled_p ())
5196 dump_printf_loc (MSG_NOTE, vect_location,
5197 "replacing earlier pattern %G", orig_pattern_stmt);
5198
5199 /* To keep the book-keeping simple, just swap the lhs of the
5200 old and new statements, so that the old one has a valid but
5201 unused lhs. */
5202 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5203 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5204 gimple_set_lhs (pattern_stmt, old_lhs);
5205
5206 if (dump_enabled_p ())
5207 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5208
5209 /* Switch to the statement that ORIG replaces. */
5210 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5211
5212 /* We shouldn't be replacing the main pattern statement. */
5213 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5214 != orig_pattern_stmt);
5215 }
5216
5217 if (def_seq)
5218 for (gimple_stmt_iterator si = gsi_start (def_seq);
5219 !gsi_end_p (si); gsi_next (&si))
5220 {
5221 if (dump_enabled_p ())
5222 dump_printf_loc (MSG_NOTE, vect_location,
5223 "extra pattern stmt: %G", gsi_stmt (si));
5224 stmt_vec_info pattern_stmt_info
5225 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5226 orig_stmt_info, pattern_vectype);
5227 /* Stmts in the def sequence are not vectorizable cycle or
5228 induction defs, instead they should all be vect_internal_def
5229 feeding the main pattern stmt which retains this def type. */
5230 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5231 }
5232
5233 if (orig_pattern_stmt)
5234 {
5235 vect_init_pattern_stmt (vinfo, pattern_stmt,
5236 orig_stmt_info, pattern_vectype);
5237
5238 /* Insert all the new pattern statements before the original one. */
5239 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5240 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5241 orig_def_seq);
5242 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5243 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5244
5245 /* Remove the pattern statement that this new pattern replaces. */
5246 gsi_remove (&gsi, false);
5247 }
5248 else
5249 vect_set_pattern_stmt (vinfo,
5250 pattern_stmt, orig_stmt_info, pattern_vectype);
5251
5252 /* Transfer reduction path info to the pattern. */
5253 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5254 {
5255 tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5256 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5257 /* Search the pattern def sequence and the main pattern stmt. Note
5258 we may have inserted all into a containing pattern def sequence
5259 so the following is a bit awkward. */
5260 gimple_stmt_iterator si;
5261 gimple *s;
5262 if (def_seq)
5263 {
5264 si = gsi_start (def_seq);
5265 s = gsi_stmt (si);
5266 gsi_next (&si);
5267 }
5268 else
5269 {
5270 si = gsi_none ();
5271 s = pattern_stmt;
5272 }
5273 do
5274 {
5275 bool found = false;
5276 for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5277 if (gimple_op (s, i) == lookfor)
5278 {
5279 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5280 lookfor = gimple_get_lhs (s);
5281 found = true;
5282 break;
5283 }
5284 if (s == pattern_stmt)
5285 {
5286 if (!found && dump_enabled_p ())
5287 dump_printf_loc (MSG_NOTE, vect_location,
5288 "failed to update reduction index.\n");
5289 break;
5290 }
5291 if (gsi_end_p (si))
5292 s = pattern_stmt;
5293 else
5294 {
5295 s = gsi_stmt (si);
5296 if (s == pattern_stmt)
5297 /* Found the end inside a bigger pattern def seq. */
5298 si = gsi_none ();
5299 else
5300 gsi_next (&si);
5301 }
5302 } while (1);
5303 }
5304 }
5305
5306 /* Function vect_pattern_recog_1
5307
5308 Input:
5309 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5310 computation pattern.
5311 STMT_INFO: A stmt from which the pattern search should start.
5312
5313 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5314 a sequence of statements that has the same functionality and can be
5315 used to replace STMT_INFO. It returns the last statement in the sequence
5316 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5317 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5318 statement, having first checked that the target supports the new operation
5319 in that type.
5320
5321 This function also does some bookkeeping, as explained in the documentation
5322 for vect_recog_pattern. */
5323
5324 static void
5325 vect_pattern_recog_1 (vec_info *vinfo,
5326 vect_recog_func *recog_func, stmt_vec_info stmt_info)
5327 {
5328 gimple *pattern_stmt;
5329 loop_vec_info loop_vinfo;
5330 tree pattern_vectype;
5331
5332 /* If this statement has already been replaced with pattern statements,
5333 leave the original statement alone, since the first match wins.
5334 Instead try to match against the definition statements that feed
5335 the main pattern statement. */
5336 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5337 {
5338 gimple_stmt_iterator gsi;
5339 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5340 !gsi_end_p (gsi); gsi_next (&gsi))
5341 vect_pattern_recog_1 (vinfo, recog_func,
5342 vinfo->lookup_stmt (gsi_stmt (gsi)));
5343 return;
5344 }
5345
5346 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5347 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5348 if (!pattern_stmt)
5349 {
5350 /* Clear any half-formed pattern definition sequence. */
5351 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5352 return;
5353 }
5354
5355 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5356 gcc_assert (pattern_vectype);
5357
5358 /* Found a vectorizable pattern. */
5359 if (dump_enabled_p ())
5360 dump_printf_loc (MSG_NOTE, vect_location,
5361 "%s pattern recognized: %G",
5362 recog_func->name, pattern_stmt);
5363
5364 /* Mark the stmts that are involved in the pattern. */
5365 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5366
5367 /* Patterns cannot be vectorized using SLP, because they change the order of
5368 computation. */
5369 if (loop_vinfo)
5370 {
5371 unsigned ix, ix2;
5372 stmt_vec_info *elem_ptr;
5373 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5374 elem_ptr, *elem_ptr == stmt_info);
5375 }
5376 }
5377
5378
5379 /* Function vect_pattern_recog
5380
5381 Input:
5382 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5383 computation idioms.
5384
5385 Output - for each computation idiom that is detected we create a new stmt
5386 that provides the same functionality and that can be vectorized. We
5387 also record some information in the struct_stmt_info of the relevant
5388 stmts, as explained below:
5389
5390 At the entry to this function we have the following stmts, with the
5391 following initial value in the STMT_VINFO fields:
5392
5393 stmt in_pattern_p related_stmt vec_stmt
5394 S1: a_i = .... - - -
5395 S2: a_2 = ..use(a_i).. - - -
5396 S3: a_1 = ..use(a_2).. - - -
5397 S4: a_0 = ..use(a_1).. - - -
5398 S5: ... = ..use(a_0).. - - -
5399
5400 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5401 represented by a single stmt. We then:
5402 - create a new stmt S6 equivalent to the pattern (the stmt is not
5403 inserted into the code)
5404 - fill in the STMT_VINFO fields as follows:
5405
5406 in_pattern_p related_stmt vec_stmt
5407 S1: a_i = .... - - -
5408 S2: a_2 = ..use(a_i).. - - -
5409 S3: a_1 = ..use(a_2).. - - -
5410 S4: a_0 = ..use(a_1).. true S6 -
5411 '---> S6: a_new = .... - S4 -
5412 S5: ... = ..use(a_0).. - - -
5413
5414 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5415 to each other through the RELATED_STMT field).
5416
5417 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5418 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5419 remain irrelevant unless used by stmts other than S4.
5420
5421 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5422 (because they are marked as irrelevant). It will vectorize S6, and record
5423 a pointer to the new vector stmt VS6 from S6 (as usual).
5424 S4 will be skipped, and S5 will be vectorized as usual:
5425
5426 in_pattern_p related_stmt vec_stmt
5427 S1: a_i = .... - - -
5428 S2: a_2 = ..use(a_i).. - - -
5429 S3: a_1 = ..use(a_2).. - - -
5430 > VS6: va_new = .... - - -
5431 S4: a_0 = ..use(a_1).. true S6 VS6
5432 '---> S6: a_new = .... - S4 VS6
5433 > VS5: ... = ..vuse(va_new).. - - -
5434 S5: ... = ..use(a_0).. - - -
5435
5436 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5437 elsewhere), and we'll end up with:
5438
5439 VS6: va_new = ....
5440 VS5: ... = ..vuse(va_new)..
5441
5442 In case of more than one pattern statements, e.g., widen-mult with
5443 intermediate type:
5444
5445 S1 a_t = ;
5446 S2 a_T = (TYPE) a_t;
5447 '--> S3: a_it = (interm_type) a_t;
5448 S4 prod_T = a_T * CONST;
5449 '--> S5: prod_T' = a_it w* CONST;
5450
5451 there may be other users of a_T outside the pattern. In that case S2 will
5452 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5453 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5454 be recorded in S3. */
5455
5456 void
5457 vect_pattern_recog (vec_info *vinfo)
5458 {
5459 class loop *loop;
5460 basic_block *bbs;
5461 unsigned int nbbs;
5462 gimple_stmt_iterator si;
5463 unsigned int i, j;
5464
5465 vect_determine_precisions (vinfo);
5466
5467 DUMP_VECT_SCOPE ("vect_pattern_recog");
5468
5469 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5470 {
5471 loop = LOOP_VINFO_LOOP (loop_vinfo);
5472 bbs = LOOP_VINFO_BBS (loop_vinfo);
5473 nbbs = loop->num_nodes;
5474
5475 /* Scan through the loop stmts, applying the pattern recognition
5476 functions starting at each stmt visited: */
5477 for (i = 0; i < nbbs; i++)
5478 {
5479 basic_block bb = bbs[i];
5480 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5481 {
5482 if (is_gimple_debug (gsi_stmt (si)))
5483 continue;
5484 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5485 /* Scan over all generic vect_recog_xxx_pattern functions. */
5486 for (j = 0; j < NUM_PATTERNS; j++)
5487 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5488 stmt_info);
5489 }
5490 }
5491 }
5492 else
5493 {
5494 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5495 for (si = bb_vinfo->region_begin;
5496 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
5497 {
5498 gimple *stmt = gsi_stmt (si);
5499 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
5500 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5501 continue;
5502
5503 /* Scan over all generic vect_recog_xxx_pattern functions. */
5504 for (j = 0; j < NUM_PATTERNS; j++)
5505 vect_pattern_recog_1 (vinfo,
5506 &vect_vect_recog_func_ptrs[j], stmt_info);
5507 }
5508 }
5509
5510 /* After this no more add_stmt calls are allowed. */
5511 vinfo->stmt_vec_info_ro = true;
5512 }