]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-patterns.cc
Fix profile update in tree_transform_and_unroll_loop
[thirdparty/gcc.git] / gcc / tree-vect-patterns.cc
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "gimple-iterator.h"
29 #include "gimple-fold.h"
30 #include "ssa.h"
31 #include "expmed.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "tree-eh.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimple-fold.h"
41 #include "gimplify-me.h"
42 #include "cfgloop.h"
43 #include "tree-vectorizer.h"
44 #include "dumpfile.h"
45 #include "builtins.h"
46 #include "internal-fn.h"
47 #include "case-cfn-macros.h"
48 #include "fold-const-call.h"
49 #include "attribs.h"
50 #include "cgraph.h"
51 #include "omp-simd-clone.h"
52 #include "predict.h"
53 #include "tree-vector-builder.h"
54 #include "vec-perm-indices.h"
55 #include "gimple-range.h"
56
57
58 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
59 in the first operand. Disentangling this is future work, the
60 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
61
62
63 /* Return true if we have a useful VR_RANGE range for VAR, storing it
64 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
65
66 bool
67 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
68 {
69 value_range vr;
70 tree vr_min, vr_max;
71 get_range_query (cfun)->range_of_expr (vr, var);
72 if (vr.undefined_p ())
73 vr.set_varying (TREE_TYPE (var));
74 value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
75 *min_value = wi::to_wide (vr_min);
76 *max_value = wi::to_wide (vr_max);
77 wide_int nonzero = get_nonzero_bits (var);
78 signop sgn = TYPE_SIGN (TREE_TYPE (var));
79 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
80 nonzero, sgn) == VR_RANGE)
81 {
82 if (dump_enabled_p ())
83 {
84 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
85 dump_printf (MSG_NOTE, " has range [");
86 dump_hex (MSG_NOTE, *min_value);
87 dump_printf (MSG_NOTE, ", ");
88 dump_hex (MSG_NOTE, *max_value);
89 dump_printf (MSG_NOTE, "]\n");
90 }
91 return true;
92 }
93 else
94 {
95 if (dump_enabled_p ())
96 {
97 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
98 dump_printf (MSG_NOTE, " has no range info\n");
99 }
100 return false;
101 }
102 }
103
104 /* Report that we've found an instance of pattern PATTERN in
105 statement STMT. */
106
107 static void
108 vect_pattern_detected (const char *name, gimple *stmt)
109 {
110 if (dump_enabled_p ())
111 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
112 }
113
114 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
115 return the pattern statement's stmt_vec_info. Set its vector type to
116 VECTYPE if it doesn't have one already. */
117
118 static stmt_vec_info
119 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
120 stmt_vec_info orig_stmt_info, tree vectype)
121 {
122 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
123 if (pattern_stmt_info == NULL)
124 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
125 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
126
127 pattern_stmt_info->pattern_stmt_p = true;
128 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
129 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
130 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
131 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
132 {
133 gcc_assert (!vectype
134 || (VECTOR_BOOLEAN_TYPE_P (vectype)
135 == vect_use_mask_type_p (orig_stmt_info)));
136 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
137 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
138 }
139 return pattern_stmt_info;
140 }
141
142 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
143 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
144 have one already. */
145
146 static void
147 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
148 stmt_vec_info orig_stmt_info, tree vectype)
149 {
150 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
151 STMT_VINFO_RELATED_STMT (orig_stmt_info)
152 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
153 }
154
155 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
156 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
157 be different from the vector type of the final pattern statement.
158 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
159 from which it was derived. */
160
161 static inline void
162 append_pattern_def_seq (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple *new_stmt,
164 tree vectype = NULL_TREE,
165 tree scalar_type_for_mask = NULL_TREE)
166 {
167 gcc_assert (!scalar_type_for_mask
168 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
169 if (vectype)
170 {
171 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
172 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
173 if (scalar_type_for_mask)
174 new_stmt_info->mask_precision
175 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
176 }
177 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
178 new_stmt);
179 }
180
181 /* The caller wants to perform new operations on vect_external variable
182 VAR, so that the result of the operations would also be vect_external.
183 Return the edge on which the operations can be performed, if one exists.
184 Return null if the operations should instead be treated as part of
185 the pattern that needs them. */
186
187 static edge
188 vect_get_external_def_edge (vec_info *vinfo, tree var)
189 {
190 edge e = NULL;
191 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
192 {
193 e = loop_preheader_edge (loop_vinfo->loop);
194 if (!SSA_NAME_IS_DEFAULT_DEF (var))
195 {
196 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
197 if (bb == NULL
198 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
199 e = NULL;
200 }
201 }
202 return e;
203 }
204
205 /* Return true if the target supports a vector version of CODE,
206 where CODE is known to map to a direct optab with the given SUBTYPE.
207 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
208 specifies the type of the scalar result.
209
210 If CODE allows the inputs and outputs to have different type
211 (such as for WIDEN_SUM_EXPR), it is the input mode rather
212 than the output mode that determines the appropriate target pattern.
213 Operand 0 of the target pattern then specifies the mode that the output
214 must have.
215
216 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
217 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
218 is nonnull. */
219
220 static bool
221 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
222 tree itype, tree *vecotype_out,
223 tree *vecitype_out = NULL,
224 enum optab_subtype subtype = optab_default)
225 {
226 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
227 if (!vecitype)
228 return false;
229
230 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
231 if (!vecotype)
232 return false;
233
234 optab optab = optab_for_tree_code (code, vecitype, subtype);
235 if (!optab)
236 return false;
237
238 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
239 if (icode == CODE_FOR_nothing
240 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
241 return false;
242
243 *vecotype_out = vecotype;
244 if (vecitype_out)
245 *vecitype_out = vecitype;
246 return true;
247 }
248
249 /* Round bit precision PRECISION up to a full element. */
250
251 static unsigned int
252 vect_element_precision (unsigned int precision)
253 {
254 precision = 1 << ceil_log2 (precision);
255 return MAX (precision, BITS_PER_UNIT);
256 }
257
258 /* If OP is defined by a statement that's being considered for vectorization,
259 return information about that statement, otherwise return NULL. */
260
261 static stmt_vec_info
262 vect_get_internal_def (vec_info *vinfo, tree op)
263 {
264 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
265 if (def_stmt_info
266 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
267 return def_stmt_info;
268 return NULL;
269 }
270
271 /* Check whether NAME, an ssa-name used in STMT_VINFO,
272 is a result of a type promotion, such that:
273 DEF_STMT: NAME = NOP (name0)
274 If CHECK_SIGN is TRUE, check that either both types are signed or both are
275 unsigned. */
276
277 static bool
278 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
279 tree *orig_type, gimple **def_stmt, bool *promotion)
280 {
281 tree type = TREE_TYPE (name);
282 tree oprnd0;
283 enum vect_def_type dt;
284
285 stmt_vec_info def_stmt_info;
286 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
287 return false;
288
289 if (dt != vect_internal_def
290 && dt != vect_external_def && dt != vect_constant_def)
291 return false;
292
293 if (!*def_stmt)
294 return false;
295
296 if (!is_gimple_assign (*def_stmt))
297 return false;
298
299 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
300 return false;
301
302 oprnd0 = gimple_assign_rhs1 (*def_stmt);
303
304 *orig_type = TREE_TYPE (oprnd0);
305 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
306 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
307 return false;
308
309 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
310 *promotion = true;
311 else
312 *promotion = false;
313
314 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
315 return false;
316
317 return true;
318 }
319
320 /* Holds information about an input operand after some sign changes
321 and type promotions have been peeled away. */
322 class vect_unpromoted_value {
323 public:
324 vect_unpromoted_value ();
325
326 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
327
328 /* The value obtained after peeling away zero or more casts. */
329 tree op;
330
331 /* The type of OP. */
332 tree type;
333
334 /* The definition type of OP. */
335 vect_def_type dt;
336
337 /* If OP is the result of peeling at least one cast, and if the cast
338 of OP itself is a vectorizable statement, CASTER identifies that
339 statement, otherwise it is null. */
340 stmt_vec_info caster;
341 };
342
343 inline vect_unpromoted_value::vect_unpromoted_value ()
344 : op (NULL_TREE),
345 type (NULL_TREE),
346 dt (vect_uninitialized_def),
347 caster (NULL)
348 {
349 }
350
351 /* Set the operand to OP_IN, its definition type to DT_IN, and the
352 statement that casts it to CASTER_IN. */
353
354 inline void
355 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
356 stmt_vec_info caster_in)
357 {
358 op = op_in;
359 type = TREE_TYPE (op);
360 dt = dt_in;
361 caster = caster_in;
362 }
363
364 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
365 to reach some vectorizable inner operand OP', continuing as long as it
366 is possible to convert OP' back to OP using a possible sign change
367 followed by a possible promotion P. Return this OP', or null if OP is
368 not a vectorizable SSA name. If there is a promotion P, describe its
369 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
370 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
371 have more than one user.
372
373 A successful return means that it is possible to go from OP' to OP
374 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
375 whereas the cast from UNPROM to OP might be a promotion, a sign
376 change, or a nop.
377
378 E.g. say we have:
379
380 signed short *ptr = ...;
381 signed short C = *ptr;
382 unsigned short B = (unsigned short) C; // sign change
383 signed int A = (signed int) B; // unsigned promotion
384 ...possible other uses of A...
385 unsigned int OP = (unsigned int) A; // sign change
386
387 In this case it's possible to go directly from C to OP using:
388
389 OP = (unsigned int) (unsigned short) C;
390 +------------+ +--------------+
391 promotion sign change
392
393 so OP' would be C. The input to the promotion is B, so UNPROM
394 would describe B. */
395
396 static tree
397 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
398 vect_unpromoted_value *unprom,
399 bool *single_use_p = NULL)
400 {
401 tree op_type = TREE_TYPE (op);
402 if (!INTEGRAL_TYPE_P (op_type))
403 return NULL_TREE;
404
405 tree res = NULL_TREE;
406 unsigned int orig_precision = TYPE_PRECISION (op_type);
407 unsigned int min_precision = orig_precision;
408 stmt_vec_info caster = NULL;
409 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
410 {
411 /* See whether OP is simple enough to vectorize. */
412 stmt_vec_info def_stmt_info;
413 gimple *def_stmt;
414 vect_def_type dt;
415 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
416 break;
417
418 /* If OP is the input of a demotion, skip over it to see whether
419 OP is itself the result of a promotion. If so, the combined
420 effect of the promotion and the demotion might fit the required
421 pattern, otherwise neither operation fits.
422
423 This copes with cases such as the result of an arithmetic
424 operation being truncated before being stored, and where that
425 arithmetic operation has been recognized as an over-widened one. */
426 if (TYPE_PRECISION (op_type) <= min_precision)
427 {
428 /* Use OP as the UNPROM described above if we haven't yet
429 found a promotion, or if using the new input preserves the
430 sign of the previous promotion. */
431 if (!res
432 || TYPE_PRECISION (unprom->type) == orig_precision
433 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
434 {
435 unprom->set_op (op, dt, caster);
436 min_precision = TYPE_PRECISION (op_type);
437 }
438 /* Stop if we've already seen a promotion and if this
439 conversion does more than change the sign. */
440 else if (TYPE_PRECISION (op_type)
441 != TYPE_PRECISION (unprom->type))
442 break;
443
444 /* The sequence now extends to OP. */
445 res = op;
446 }
447
448 /* See whether OP is defined by a cast. Record it as CASTER if
449 the cast is potentially vectorizable. */
450 if (!def_stmt)
451 break;
452 caster = def_stmt_info;
453
454 /* Ignore pattern statements, since we don't link uses for them. */
455 if (caster
456 && single_use_p
457 && !STMT_VINFO_RELATED_STMT (caster)
458 && !has_single_use (res))
459 *single_use_p = false;
460
461 gassign *assign = dyn_cast <gassign *> (def_stmt);
462 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
463 break;
464
465 /* Continue with the input to the cast. */
466 op = gimple_assign_rhs1 (def_stmt);
467 op_type = TREE_TYPE (op);
468 }
469 return res;
470 }
471
472 /* OP is an integer operand to an operation that returns TYPE, and we
473 want to treat the operation as a widening one. So far we can treat
474 it as widening from *COMMON_TYPE.
475
476 Return true if OP is suitable for such a widening operation,
477 either widening from *COMMON_TYPE or from some supertype of it.
478 Update *COMMON_TYPE to the supertype in the latter case.
479
480 SHIFT_P is true if OP is a shift amount. */
481
482 static bool
483 vect_joust_widened_integer (tree type, bool shift_p, tree op,
484 tree *common_type)
485 {
486 /* Calculate the minimum precision required by OP, without changing
487 the sign of either operand. */
488 unsigned int precision;
489 if (shift_p)
490 {
491 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
492 return false;
493 precision = TREE_INT_CST_LOW (op);
494 }
495 else
496 {
497 precision = wi::min_precision (wi::to_widest (op),
498 TYPE_SIGN (*common_type));
499 if (precision * 2 > TYPE_PRECISION (type))
500 return false;
501 }
502
503 /* If OP requires a wider type, switch to that type. The checks
504 above ensure that this is still narrower than the result. */
505 precision = vect_element_precision (precision);
506 if (TYPE_PRECISION (*common_type) < precision)
507 *common_type = build_nonstandard_integer_type
508 (precision, TYPE_UNSIGNED (*common_type));
509 return true;
510 }
511
512 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
513 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
514
515 static bool
516 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
517 {
518 if (types_compatible_p (*common_type, new_type))
519 return true;
520
521 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
522 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
523 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
524 return true;
525
526 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
527 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
528 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
529 {
530 *common_type = new_type;
531 return true;
532 }
533
534 /* We have mismatched signs, with the signed type being
535 no wider than the unsigned type. In this case we need
536 a wider signed type. */
537 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
538 TYPE_PRECISION (new_type));
539 precision *= 2;
540
541 if (precision * 2 > TYPE_PRECISION (type))
542 return false;
543
544 *common_type = build_nonstandard_integer_type (precision, false);
545 return true;
546 }
547
548 /* Check whether STMT_INFO can be viewed as a tree of integer operations
549 in which each node either performs CODE or WIDENED_CODE, and where
550 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
551 specifies the maximum number of leaf operands. SHIFT_P says whether
552 CODE and WIDENED_CODE are some sort of shift.
553
554 If STMT_INFO is such a tree, return the number of leaf operands
555 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
556 to a type that (a) is narrower than the result of STMT_INFO and
557 (b) can hold all leaf operand values.
558
559 If SUBTYPE then allow that the signs of the operands
560 may differ in signs but not in precision. SUBTYPE is updated to reflect
561 this.
562
563 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
564 exists. */
565
566 static unsigned int
567 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
568 code_helper widened_code, bool shift_p,
569 unsigned int max_nops,
570 vect_unpromoted_value *unprom, tree *common_type,
571 enum optab_subtype *subtype = NULL)
572 {
573 /* Check for an integer operation with the right code. */
574 gimple* stmt = stmt_info->stmt;
575 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
576 return 0;
577
578 code_helper rhs_code;
579 if (is_gimple_assign (stmt))
580 rhs_code = gimple_assign_rhs_code (stmt);
581 else if (is_gimple_call (stmt))
582 rhs_code = gimple_call_combined_fn (stmt);
583 else
584 return 0;
585
586 if (rhs_code != code
587 && rhs_code != widened_code)
588 return 0;
589
590 tree lhs = gimple_get_lhs (stmt);
591 tree type = TREE_TYPE (lhs);
592 if (!INTEGRAL_TYPE_P (type))
593 return 0;
594
595 /* Assume that both operands will be leaf operands. */
596 max_nops -= 2;
597
598 /* Check the operands. */
599 unsigned int next_op = 0;
600 for (unsigned int i = 0; i < 2; ++i)
601 {
602 vect_unpromoted_value *this_unprom = &unprom[next_op];
603 unsigned int nops = 1;
604 tree op = gimple_arg (stmt, i);
605 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
606 {
607 /* We already have a common type from earlier operands.
608 Update it to account for OP. */
609 this_unprom->set_op (op, vect_constant_def);
610 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
611 return 0;
612 }
613 else
614 {
615 /* Only allow shifts by constants. */
616 if (shift_p && i == 1)
617 return 0;
618
619 if (rhs_code != code)
620 {
621 /* If rhs_code is widened_code, don't look through further
622 possible promotions, there is a promotion already embedded
623 in the WIDEN_*_EXPR. */
624 if (TREE_CODE (op) != SSA_NAME
625 || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
626 return 0;
627
628 stmt_vec_info def_stmt_info;
629 gimple *def_stmt;
630 vect_def_type dt;
631 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
632 &def_stmt))
633 return 0;
634 this_unprom->set_op (op, dt, NULL);
635 }
636 else if (!vect_look_through_possible_promotion (vinfo, op,
637 this_unprom))
638 return 0;
639
640 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
641 {
642 /* The operand isn't widened. If STMT_INFO has the code
643 for an unwidened operation, recursively check whether
644 this operand is a node of the tree. */
645 if (rhs_code != code
646 || max_nops == 0
647 || this_unprom->dt != vect_internal_def)
648 return 0;
649
650 /* Give back the leaf slot allocated above now that we're
651 not treating this as a leaf operand. */
652 max_nops += 1;
653
654 /* Recursively process the definition of the operand. */
655 stmt_vec_info def_stmt_info
656 = vinfo->lookup_def (this_unprom->op);
657 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
658 widened_code, shift_p, max_nops,
659 this_unprom, common_type,
660 subtype);
661 if (nops == 0)
662 return 0;
663
664 max_nops -= nops;
665 }
666 else
667 {
668 /* Make sure that the operand is narrower than the result. */
669 if (TYPE_PRECISION (this_unprom->type) * 2
670 > TYPE_PRECISION (type))
671 return 0;
672
673 /* Update COMMON_TYPE for the new operand. */
674 if (i == 0)
675 *common_type = this_unprom->type;
676 else if (!vect_joust_widened_type (type, this_unprom->type,
677 common_type))
678 {
679 if (subtype)
680 {
681 /* See if we can sign extend the smaller type. */
682 if (TYPE_PRECISION (this_unprom->type)
683 > TYPE_PRECISION (*common_type))
684 *common_type = this_unprom->type;
685 *subtype = optab_vector_mixed_sign;
686 }
687 else
688 return 0;
689 }
690 }
691 }
692 next_op += nops;
693 }
694 return next_op;
695 }
696
697 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
698 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
699
700 static tree
701 vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
702 {
703 return make_temp_ssa_name (type, stmt, "patt");
704 }
705
706 /* STMT2_INFO describes a type conversion that could be split into STMT1
707 followed by a version of STMT2_INFO that takes NEW_RHS as its first
708 input. Try to do this using pattern statements, returning true on
709 success. */
710
711 static bool
712 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
713 gimple *stmt1, tree vectype)
714 {
715 if (is_pattern_stmt_p (stmt2_info))
716 {
717 /* STMT2_INFO is part of a pattern. Get the statement to which
718 the pattern is attached. */
719 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
720 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
721
722 if (dump_enabled_p ())
723 dump_printf_loc (MSG_NOTE, vect_location,
724 "Splitting pattern statement: %G", stmt2_info->stmt);
725
726 /* Since STMT2_INFO is a pattern statement, we can change it
727 in-situ without worrying about changing the code for the
728 containing block. */
729 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
730
731 if (dump_enabled_p ())
732 {
733 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
734 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
735 stmt2_info->stmt);
736 }
737
738 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
739 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
740 /* STMT2_INFO is the actual pattern statement. Add STMT1
741 to the end of the definition sequence. */
742 gimple_seq_add_stmt_without_update (def_seq, stmt1);
743 else
744 {
745 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
746 before it. */
747 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
748 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
749 }
750 return true;
751 }
752 else
753 {
754 /* STMT2_INFO doesn't yet have a pattern. Try to create a
755 two-statement pattern now. */
756 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
757 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
758 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
759 if (!lhs_vectype)
760 return false;
761
762 if (dump_enabled_p ())
763 dump_printf_loc (MSG_NOTE, vect_location,
764 "Splitting statement: %G", stmt2_info->stmt);
765
766 /* Add STMT1 as a singleton pattern definition sequence. */
767 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
768 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
769 gimple_seq_add_stmt_without_update (def_seq, stmt1);
770
771 /* Build the second of the two pattern statements. */
772 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
773 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
774 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
775
776 if (dump_enabled_p ())
777 {
778 dump_printf_loc (MSG_NOTE, vect_location,
779 "into pattern statements: %G", stmt1);
780 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
781 (gimple *) new_stmt2);
782 }
783
784 return true;
785 }
786 }
787
788 /* Look for the following pattern
789 X = x[i]
790 Y = y[i]
791 DIFF = X - Y
792 DAD = ABS_EXPR<DIFF>
793
794 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
795 HALF_TYPE and UNPROM will be set should the statement be found to
796 be a widened operation.
797 DIFF_STMT will be set to the MINUS_EXPR
798 statement that precedes the ABS_STMT unless vect_widened_op_tree
799 succeeds.
800 */
801 static bool
802 vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
803 tree *half_type,
804 vect_unpromoted_value unprom[2],
805 gassign **diff_stmt)
806 {
807 if (!abs_stmt)
808 return false;
809
810 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
811 inside the loop (in case we are analyzing an outer-loop). */
812 enum tree_code code = gimple_assign_rhs_code (abs_stmt);
813 if (code != ABS_EXPR && code != ABSU_EXPR)
814 return false;
815
816 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
817 tree abs_type = TREE_TYPE (abs_oprnd);
818 if (!abs_oprnd)
819 return false;
820 if (!ANY_INTEGRAL_TYPE_P (abs_type)
821 || TYPE_OVERFLOW_WRAPS (abs_type)
822 || TYPE_UNSIGNED (abs_type))
823 return false;
824
825 /* Peel off conversions from the ABS input. This can involve sign
826 changes (e.g. from an unsigned subtraction to a signed ABS input)
827 or signed promotion, but it can't include unsigned promotion.
828 (Note that ABS of an unsigned promotion should have been folded
829 away before now anyway.) */
830 vect_unpromoted_value unprom_diff;
831 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
832 &unprom_diff);
833 if (!abs_oprnd)
834 return false;
835 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
836 && TYPE_UNSIGNED (unprom_diff.type))
837 return false;
838
839 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
840 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
841 if (!diff_stmt_vinfo)
842 return false;
843
844 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
845 inside the loop (in case we are analyzing an outer-loop). */
846 if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
847 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
848 false, 2, unprom, half_type))
849 return true;
850
851 /* Failed to find a widen operation so we check for a regular MINUS_EXPR. */
852 gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
853 if (diff_stmt && diff
854 && gimple_assign_rhs_code (diff) == MINUS_EXPR
855 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
856 {
857 *diff_stmt = diff;
858 *half_type = NULL_TREE;
859 return true;
860 }
861
862 return false;
863 }
864
865 /* Convert UNPROM to TYPE and return the result, adding new statements
866 to STMT_INFO's pattern definition statements if no better way is
867 available. VECTYPE is the vector form of TYPE.
868
869 If SUBTYPE then convert the type based on the subtype. */
870
871 static tree
872 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
873 vect_unpromoted_value *unprom, tree vectype,
874 enum optab_subtype subtype = optab_default)
875 {
876 /* Update the type if the signs differ. */
877 if (subtype == optab_vector_mixed_sign)
878 {
879 gcc_assert (!TYPE_UNSIGNED (type));
880 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
881 {
882 type = unsigned_type_for (type);
883 vectype = unsigned_type_for (vectype);
884 }
885 }
886
887 /* Check for a no-op conversion. */
888 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
889 return unprom->op;
890
891 /* Allow the caller to create constant vect_unpromoted_values. */
892 if (TREE_CODE (unprom->op) == INTEGER_CST)
893 return wide_int_to_tree (type, wi::to_widest (unprom->op));
894
895 tree input = unprom->op;
896 if (unprom->caster)
897 {
898 tree lhs = gimple_get_lhs (unprom->caster->stmt);
899 tree lhs_type = TREE_TYPE (lhs);
900
901 /* If the result of the existing cast is the right width, use it
902 instead of the source of the cast. */
903 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
904 input = lhs;
905 /* If the precision we want is between the source and result
906 precisions of the existing cast, try splitting the cast into
907 two and tapping into a mid-way point. */
908 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
909 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
910 {
911 /* In order to preserve the semantics of the original cast,
912 give the mid-way point the same signedness as the input value.
913
914 It would be possible to use a signed type here instead if
915 TYPE is signed and UNPROM->TYPE is unsigned, but that would
916 make the sign of the midtype sensitive to the order in
917 which we process the statements, since the signedness of
918 TYPE is the signedness required by just one of possibly
919 many users. Also, unsigned promotions are usually as cheap
920 as or cheaper than signed ones, so it's better to keep an
921 unsigned promotion. */
922 tree midtype = build_nonstandard_integer_type
923 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
924 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
925 if (vec_midtype)
926 {
927 input = vect_recog_temp_ssa_var (midtype, NULL);
928 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
929 unprom->op);
930 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
931 vec_midtype))
932 append_pattern_def_seq (vinfo, stmt_info,
933 new_stmt, vec_midtype);
934 }
935 }
936
937 /* See if we can reuse an existing result. */
938 if (types_compatible_p (type, TREE_TYPE (input)))
939 return input;
940 }
941
942 /* We need a new conversion statement. */
943 tree new_op = vect_recog_temp_ssa_var (type, NULL);
944 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
945
946 /* If OP is an external value, see if we can insert the new statement
947 on an incoming edge. */
948 if (input == unprom->op && unprom->dt == vect_external_def)
949 if (edge e = vect_get_external_def_edge (vinfo, input))
950 {
951 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
952 gcc_assert (!new_bb);
953 return new_op;
954 }
955
956 /* As a (common) last resort, add the statement to the pattern itself. */
957 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
958 return new_op;
959 }
960
961 /* Invoke vect_convert_input for N elements of UNPROM and store the
962 result in the corresponding elements of RESULT.
963
964 If SUBTYPE then convert the type based on the subtype. */
965
966 static void
967 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
968 tree *result, tree type, vect_unpromoted_value *unprom,
969 tree vectype, enum optab_subtype subtype = optab_default)
970 {
971 for (unsigned int i = 0; i < n; ++i)
972 {
973 unsigned int j;
974 for (j = 0; j < i; ++j)
975 if (unprom[j].op == unprom[i].op)
976 break;
977
978 if (j < i)
979 result[i] = result[j];
980 else
981 result[i] = vect_convert_input (vinfo, stmt_info,
982 type, &unprom[i], vectype, subtype);
983 }
984 }
985
986 /* The caller has created a (possibly empty) sequence of pattern definition
987 statements followed by a single statement PATTERN_STMT. Cast the result
988 of this final statement to TYPE. If a new statement is needed, add
989 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
990 and return the new statement, otherwise return PATTERN_STMT as-is.
991 VECITYPE is the vector form of PATTERN_STMT's result type. */
992
993 static gimple *
994 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
995 gimple *pattern_stmt, tree vecitype)
996 {
997 tree lhs = gimple_get_lhs (pattern_stmt);
998 if (!types_compatible_p (type, TREE_TYPE (lhs)))
999 {
1000 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
1001 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
1002 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
1003 }
1004 return pattern_stmt;
1005 }
1006
1007 /* Return true if STMT_VINFO describes a reduction for which reassociation
1008 is allowed. If STMT_INFO is part of a group, assume that it's part of
1009 a reduction chain and optimistically assume that all statements
1010 except the last allow reassociation.
1011 Also require it to have code CODE and to be a reduction
1012 in the outermost loop. When returning true, store the operands in
1013 *OP0_OUT and *OP1_OUT. */
1014
1015 static bool
1016 vect_reassociating_reduction_p (vec_info *vinfo,
1017 stmt_vec_info stmt_info, tree_code code,
1018 tree *op0_out, tree *op1_out)
1019 {
1020 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
1021 if (!loop_info)
1022 return false;
1023
1024 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
1025 if (!assign || gimple_assign_rhs_code (assign) != code)
1026 return false;
1027
1028 /* We don't allow changing the order of the computation in the inner-loop
1029 when doing outer-loop vectorization. */
1030 class loop *loop = LOOP_VINFO_LOOP (loop_info);
1031 if (loop && nested_in_vect_loop_p (loop, stmt_info))
1032 return false;
1033
1034 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1035 {
1036 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
1037 code))
1038 return false;
1039 }
1040 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
1041 return false;
1042
1043 *op0_out = gimple_assign_rhs1 (assign);
1044 *op1_out = gimple_assign_rhs2 (assign);
1045 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
1046 std::swap (*op0_out, *op1_out);
1047 return true;
1048 }
1049
1050 /* match.pd function to match
1051 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1052 with conditions:
1053 1) @1, @2, c, d, a, b are all integral type.
1054 2) There's single_use for both @1 and @2.
1055 3) a, c have same precision.
1056 4) c and @1 have different precision.
1057 5) c, d are the same type or they can differ in sign when convert is
1058 truncation.
1059
1060 record a and c and d and @3. */
1061
1062 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
1063
1064 /* Function vect_recog_cond_expr_convert
1065
1066 Try to find the following pattern:
1067
1068 TYPE_AB A,B;
1069 TYPE_CD C,D;
1070 TYPE_E E;
1071 TYPE_E op_true = (TYPE_E) A;
1072 TYPE_E op_false = (TYPE_E) B;
1073
1074 E = C cmp D ? op_true : op_false;
1075
1076 where
1077 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1078 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1079 single_use of op_true and op_false.
1080 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1081
1082 Input:
1083
1084 * STMT_VINFO: The stmt from which the pattern search begins.
1085 here it starts with E = c cmp D ? op_true : op_false;
1086
1087 Output:
1088
1089 TYPE1 E' = C cmp D ? A : B;
1090 TYPE3 E = (TYPE3) E';
1091
1092 There may extra nop_convert for A or B to handle different signness.
1093
1094 * TYPE_OUT: The vector type of the output of this pattern.
1095
1096 * Return value: A new stmt that will be used to replace the sequence of
1097 stmts that constitute the pattern. In this case it will be:
1098 E = (TYPE3)E';
1099 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1100
1101 static gimple *
1102 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1103 stmt_vec_info stmt_vinfo, tree *type_out)
1104 {
1105 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1106 tree lhs, match[4], temp, type, new_lhs, op2;
1107 gimple *cond_stmt;
1108 gimple *pattern_stmt;
1109
1110 if (!last_stmt)
1111 return NULL;
1112
1113 lhs = gimple_assign_lhs (last_stmt);
1114
1115 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1116 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1117 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1118 return NULL;
1119
1120 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1121
1122 op2 = match[2];
1123 type = TREE_TYPE (match[1]);
1124 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1125 {
1126 op2 = vect_recog_temp_ssa_var (type, NULL);
1127 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1128 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1129 get_vectype_for_scalar_type (vinfo, type));
1130 }
1131
1132 temp = vect_recog_temp_ssa_var (type, NULL);
1133 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1134 match[1], op2));
1135 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1136 get_vectype_for_scalar_type (vinfo, type));
1137 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1138 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1139 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1140
1141 if (dump_enabled_p ())
1142 dump_printf_loc (MSG_NOTE, vect_location,
1143 "created pattern stmt: %G", pattern_stmt);
1144 return pattern_stmt;
1145 }
1146
1147 /* Function vect_recog_dot_prod_pattern
1148
1149 Try to find the following pattern:
1150
1151 type1a x_t
1152 type1b y_t;
1153 TYPE1 prod;
1154 TYPE2 sum = init;
1155 loop:
1156 sum_0 = phi <init, sum_1>
1157 S1 x_t = ...
1158 S2 y_t = ...
1159 S3 x_T = (TYPE1) x_t;
1160 S4 y_T = (TYPE1) y_t;
1161 S5 prod = x_T * y_T;
1162 [S6 prod = (TYPE2) prod; #optional]
1163 S7 sum_1 = prod + sum_0;
1164
1165 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1166 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1167 'type1a' and 'type1b' can differ.
1168
1169 Input:
1170
1171 * STMT_VINFO: The stmt from which the pattern search begins. In the
1172 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1173 will be detected.
1174
1175 Output:
1176
1177 * TYPE_OUT: The type of the output of this pattern.
1178
1179 * Return value: A new stmt that will be used to replace the sequence of
1180 stmts that constitute the pattern. In this case it will be:
1181 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1182
1183 Note: The dot-prod idiom is a widening reduction pattern that is
1184 vectorized without preserving all the intermediate results. It
1185 produces only N/2 (widened) results (by summing up pairs of
1186 intermediate results) rather than all N results. Therefore, we
1187 cannot allow this pattern when we want to get all the results and in
1188 the correct order (as is the case when this computation is in an
1189 inner-loop nested in an outer-loop that us being vectorized). */
1190
1191 static gimple *
1192 vect_recog_dot_prod_pattern (vec_info *vinfo,
1193 stmt_vec_info stmt_vinfo, tree *type_out)
1194 {
1195 tree oprnd0, oprnd1;
1196 gimple *last_stmt = stmt_vinfo->stmt;
1197 tree type, half_type;
1198 gimple *pattern_stmt;
1199 tree var;
1200
1201 /* Look for the following pattern
1202 DX = (TYPE1) X;
1203 DY = (TYPE1) Y;
1204 DPROD = DX * DY;
1205 DDPROD = (TYPE2) DPROD;
1206 sum_1 = DDPROD + sum_0;
1207 In which
1208 - DX is double the size of X
1209 - DY is double the size of Y
1210 - DX, DY, DPROD all have the same type but the sign
1211 between X, Y and DPROD can differ.
1212 - sum is the same size of DPROD or bigger
1213 - sum has been recognized as a reduction variable.
1214
1215 This is equivalent to:
1216 DPROD = X w* Y; #widen mult
1217 sum_1 = DPROD w+ sum_0; #widen summation
1218 or
1219 DPROD = X w* Y; #widen mult
1220 sum_1 = DPROD + sum_0; #summation
1221 */
1222
1223 /* Starting from LAST_STMT, follow the defs of its uses in search
1224 of the above pattern. */
1225
1226 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1227 &oprnd0, &oprnd1))
1228 return NULL;
1229
1230 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1231
1232 vect_unpromoted_value unprom_mult;
1233 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1234
1235 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1236 we know that oprnd1 is the reduction variable (defined by a loop-header
1237 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1238 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1239 if (!oprnd0)
1240 return NULL;
1241
1242 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1243 if (!mult_vinfo)
1244 return NULL;
1245
1246 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1247 inside the loop (in case we are analyzing an outer-loop). */
1248 vect_unpromoted_value unprom0[2];
1249 enum optab_subtype subtype = optab_vector;
1250 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1251 false, 2, unprom0, &half_type, &subtype))
1252 return NULL;
1253
1254 /* If there are two widening operations, make sure they agree on the sign
1255 of the extension. The result of an optab_vector_mixed_sign operation
1256 is signed; otherwise, the result has the same sign as the operands. */
1257 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1258 && (subtype == optab_vector_mixed_sign
1259 ? TYPE_UNSIGNED (unprom_mult.type)
1260 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1261 return NULL;
1262
1263 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1264
1265 /* If the inputs have mixed signs, canonicalize on using the signed
1266 input type for analysis. This also helps when emulating mixed-sign
1267 operations using signed operations. */
1268 if (subtype == optab_vector_mixed_sign)
1269 half_type = signed_type_for (half_type);
1270
1271 tree half_vectype;
1272 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1273 type_out, &half_vectype, subtype))
1274 {
1275 /* We can emulate a mixed-sign dot-product using a sequence of
1276 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1277 if (subtype != optab_vector_mixed_sign
1278 || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1279 DOT_PROD_EXPR, half_type,
1280 type_out, &half_vectype,
1281 optab_vector))
1282 return NULL;
1283
1284 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1285 *type_out);
1286 }
1287
1288 /* Get the inputs in the appropriate types. */
1289 tree mult_oprnd[2];
1290 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1291 unprom0, half_vectype, subtype);
1292
1293 var = vect_recog_temp_ssa_var (type, NULL);
1294 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1295 mult_oprnd[0], mult_oprnd[1], oprnd1);
1296
1297 return pattern_stmt;
1298 }
1299
1300
1301 /* Function vect_recog_sad_pattern
1302
1303 Try to find the following Sum of Absolute Difference (SAD) pattern:
1304
1305 type x_t, y_t;
1306 signed TYPE1 diff, abs_diff;
1307 TYPE2 sum = init;
1308 loop:
1309 sum_0 = phi <init, sum_1>
1310 S1 x_t = ...
1311 S2 y_t = ...
1312 S3 x_T = (TYPE1) x_t;
1313 S4 y_T = (TYPE1) y_t;
1314 S5 diff = x_T - y_T;
1315 S6 abs_diff = ABS_EXPR <diff>;
1316 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1317 S8 sum_1 = abs_diff + sum_0;
1318
1319 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1320 same size of 'TYPE1' or bigger. This is a special case of a reduction
1321 computation.
1322
1323 Input:
1324
1325 * STMT_VINFO: The stmt from which the pattern search begins. In the
1326 example, when this function is called with S8, the pattern
1327 {S3,S4,S5,S6,S7,S8} will be detected.
1328
1329 Output:
1330
1331 * TYPE_OUT: The type of the output of this pattern.
1332
1333 * Return value: A new stmt that will be used to replace the sequence of
1334 stmts that constitute the pattern. In this case it will be:
1335 SAD_EXPR <x_t, y_t, sum_0>
1336 */
1337
1338 static gimple *
1339 vect_recog_sad_pattern (vec_info *vinfo,
1340 stmt_vec_info stmt_vinfo, tree *type_out)
1341 {
1342 gimple *last_stmt = stmt_vinfo->stmt;
1343 tree half_type;
1344
1345 /* Look for the following pattern
1346 DX = (TYPE1) X;
1347 DY = (TYPE1) Y;
1348 DDIFF = DX - DY;
1349 DAD = ABS_EXPR <DDIFF>;
1350 DDPROD = (TYPE2) DPROD;
1351 sum_1 = DAD + sum_0;
1352 In which
1353 - DX is at least double the size of X
1354 - DY is at least double the size of Y
1355 - DX, DY, DDIFF, DAD all have the same type
1356 - sum is the same size of DAD or bigger
1357 - sum has been recognized as a reduction variable.
1358
1359 This is equivalent to:
1360 DDIFF = X w- Y; #widen sub
1361 DAD = ABS_EXPR <DDIFF>;
1362 sum_1 = DAD w+ sum_0; #widen summation
1363 or
1364 DDIFF = X w- Y; #widen sub
1365 DAD = ABS_EXPR <DDIFF>;
1366 sum_1 = DAD + sum_0; #summation
1367 */
1368
1369 /* Starting from LAST_STMT, follow the defs of its uses in search
1370 of the above pattern. */
1371
1372 tree plus_oprnd0, plus_oprnd1;
1373 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1374 &plus_oprnd0, &plus_oprnd1))
1375 return NULL;
1376
1377 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1378
1379 /* Any non-truncating sequence of conversions is OK here, since
1380 with a successful match, the result of the ABS(U) is known to fit
1381 within the nonnegative range of the result type. (It cannot be the
1382 negative of the minimum signed value due to the range of the widening
1383 MINUS_EXPR.) */
1384 vect_unpromoted_value unprom_abs;
1385 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1386 &unprom_abs);
1387
1388 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1389 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1390 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1391 Then check that plus_oprnd0 is defined by an abs_expr. */
1392
1393 if (!plus_oprnd0)
1394 return NULL;
1395
1396 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1397 if (!abs_stmt_vinfo)
1398 return NULL;
1399
1400 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1401 inside the loop (in case we are analyzing an outer-loop). */
1402 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1403 vect_unpromoted_value unprom[2];
1404
1405 if (!abs_stmt)
1406 {
1407 gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
1408 if (!abd_stmt
1409 || !gimple_call_internal_p (abd_stmt)
1410 || gimple_call_num_args (abd_stmt) != 2)
1411 return NULL;
1412
1413 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1414 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1415
1416 if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
1417 {
1418 if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
1419 &unprom[0])
1420 || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
1421 &unprom[1]))
1422 return NULL;
1423 }
1424 else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
1425 {
1426 unprom[0].op = abd_oprnd0;
1427 unprom[0].type = TREE_TYPE (abd_oprnd0);
1428 unprom[1].op = abd_oprnd1;
1429 unprom[1].type = TREE_TYPE (abd_oprnd1);
1430 }
1431 else
1432 return NULL;
1433
1434 half_type = unprom[0].type;
1435 }
1436 else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
1437 unprom, NULL))
1438 return NULL;
1439
1440 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1441
1442 tree half_vectype;
1443 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1444 type_out, &half_vectype))
1445 return NULL;
1446
1447 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1448 tree sad_oprnd[2];
1449 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1450 unprom, half_vectype);
1451
1452 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1453 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1454 sad_oprnd[1], plus_oprnd1);
1455
1456 return pattern_stmt;
1457 }
1458
1459 /* Function vect_recog_abd_pattern
1460
1461 Try to find the following ABsolute Difference (ABD) or
1462 widening ABD (WIDEN_ABD) pattern:
1463
1464 TYPE1 x;
1465 TYPE2 y;
1466 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1467 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1468 TYPE3 diff = x_cast - y_cast;
1469 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1470 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1471
1472 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1473 twice as wide as TYPE3.
1474
1475 Input:
1476
1477 * STMT_VINFO: The stmt from which the pattern search begins
1478
1479 Output:
1480
1481 * TYPE_OUT: The type of the output of this pattern
1482
1483 * Return value: A new stmt that will be used to replace the sequence of
1484 stmts that constitute the pattern, principally:
1485 out = IFN_ABD (x, y)
1486 out = IFN_WIDEN_ABD (x, y)
1487 */
1488
1489 static gimple *
1490 vect_recog_abd_pattern (vec_info *vinfo,
1491 stmt_vec_info stmt_vinfo, tree *type_out)
1492 {
1493 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1494 if (!last_stmt)
1495 return NULL;
1496
1497 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1498
1499 vect_unpromoted_value unprom[2];
1500 gassign *diff_stmt;
1501 tree half_type;
1502 if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
1503 unprom, &diff_stmt))
1504 return NULL;
1505
1506 tree abd_in_type, abd_out_type;
1507
1508 if (half_type)
1509 {
1510 abd_in_type = half_type;
1511 abd_out_type = abd_in_type;
1512 }
1513 else
1514 {
1515 unprom[0].op = gimple_assign_rhs1 (diff_stmt);
1516 unprom[1].op = gimple_assign_rhs2 (diff_stmt);
1517 abd_in_type = signed_type_for (out_type);
1518 abd_out_type = abd_in_type;
1519 }
1520
1521 tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
1522 if (!vectype_in)
1523 return NULL;
1524
1525 internal_fn ifn = IFN_ABD;
1526 tree vectype_out = vectype_in;
1527
1528 if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
1529 && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
1530 {
1531 tree mid_type
1532 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
1533 TYPE_UNSIGNED (abd_in_type));
1534 tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
1535
1536 code_helper dummy_code;
1537 int dummy_int;
1538 auto_vec<tree> dummy_vec;
1539 if (mid_vectype
1540 && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
1541 stmt_vinfo, mid_vectype,
1542 vectype_in,
1543 &dummy_code, &dummy_code,
1544 &dummy_int, &dummy_vec))
1545 {
1546 ifn = IFN_VEC_WIDEN_ABD;
1547 abd_out_type = mid_type;
1548 vectype_out = mid_vectype;
1549 }
1550 }
1551
1552 if (ifn == IFN_ABD
1553 && !direct_internal_fn_supported_p (ifn, vectype_in,
1554 OPTIMIZE_FOR_SPEED))
1555 return NULL;
1556
1557 vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
1558
1559 tree abd_oprnds[2];
1560 vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
1561 abd_in_type, unprom, vectype_in);
1562
1563 *type_out = get_vectype_for_scalar_type (vinfo, out_type);
1564
1565 tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
1566 gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
1567 abd_oprnds[0], abd_oprnds[1]);
1568 gimple_call_set_lhs (abd_stmt, abd_result);
1569 gimple_set_location (abd_stmt, gimple_location (last_stmt));
1570
1571 gimple *stmt = abd_stmt;
1572 if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
1573 && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
1574 && !TYPE_UNSIGNED (abd_out_type))
1575 {
1576 tree unsign = unsigned_type_for (abd_out_type);
1577 tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
1578 stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
1579 unsign_vectype);
1580 }
1581
1582 return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
1583 }
1584
1585 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1586 so that it can be treated as though it had the form:
1587
1588 A_TYPE a;
1589 B_TYPE b;
1590 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1591 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1592 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1593 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1594 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1595
1596 Try to replace the pattern with:
1597
1598 A_TYPE a;
1599 B_TYPE b;
1600 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1601 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1602 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1603 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1604
1605 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1606
1607 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1608 name of the pattern being matched, for dump purposes. */
1609
1610 static gimple *
1611 vect_recog_widen_op_pattern (vec_info *vinfo,
1612 stmt_vec_info last_stmt_info, tree *type_out,
1613 tree_code orig_code, code_helper wide_code,
1614 bool shift_p, const char *name)
1615 {
1616 gimple *last_stmt = last_stmt_info->stmt;
1617
1618 vect_unpromoted_value unprom[2];
1619 tree half_type;
1620 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1621 shift_p, 2, unprom, &half_type))
1622
1623 return NULL;
1624
1625 /* Pattern detected. */
1626 vect_pattern_detected (name, last_stmt);
1627
1628 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1629 tree itype = type;
1630 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1631 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1632 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1633 TYPE_UNSIGNED (half_type));
1634
1635 /* Check target support */
1636 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1637 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1638 tree ctype = itype;
1639 tree vecctype = vecitype;
1640 if (orig_code == MINUS_EXPR
1641 && TYPE_UNSIGNED (itype)
1642 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1643 {
1644 /* Subtraction is special, even if half_type is unsigned and no matter
1645 whether type is signed or unsigned, if type is wider than itype,
1646 we need to sign-extend from the widening operation result to the
1647 result type.
1648 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1649 itype unsigned short and type either int or unsigned int.
1650 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1651 (unsigned short) 0xffff, but for type int we want the result -1
1652 and for type unsigned int 0xffffffff rather than 0xffff. */
1653 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1654 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1655 }
1656
1657 code_helper dummy_code;
1658 int dummy_int;
1659 auto_vec<tree> dummy_vec;
1660 if (!vectype
1661 || !vecitype
1662 || !vecctype
1663 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1664 vecitype, vectype,
1665 &dummy_code, &dummy_code,
1666 &dummy_int, &dummy_vec))
1667 return NULL;
1668
1669 *type_out = get_vectype_for_scalar_type (vinfo, type);
1670 if (!*type_out)
1671 return NULL;
1672
1673 tree oprnd[2];
1674 vect_convert_inputs (vinfo, last_stmt_info,
1675 2, oprnd, half_type, unprom, vectype);
1676
1677 tree var = vect_recog_temp_ssa_var (itype, NULL);
1678 gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
1679
1680 if (vecctype != vecitype)
1681 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1682 pattern_stmt, vecitype);
1683
1684 return vect_convert_output (vinfo, last_stmt_info,
1685 type, pattern_stmt, vecctype);
1686 }
1687
1688 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1689 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1690
1691 static gimple *
1692 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1693 tree *type_out)
1694 {
1695 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1696 MULT_EXPR, WIDEN_MULT_EXPR, false,
1697 "vect_recog_widen_mult_pattern");
1698 }
1699
1700 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1701 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1702
1703 static gimple *
1704 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1705 tree *type_out)
1706 {
1707 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1708 PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
1709 false, "vect_recog_widen_plus_pattern");
1710 }
1711
1712 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1713 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1714 static gimple *
1715 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1716 tree *type_out)
1717 {
1718 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1719 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
1720 false, "vect_recog_widen_minus_pattern");
1721 }
1722
1723 /* Try to detect abd on widened inputs, converting IFN_ABD
1724 to IFN_VEC_WIDEN_ABD. */
1725 static gimple *
1726 vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1727 tree *type_out)
1728 {
1729 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1730 if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
1731 return NULL;
1732
1733 tree last_rhs = gimple_assign_rhs1 (last_stmt);
1734
1735 tree in_type = TREE_TYPE (last_rhs);
1736 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1737 if (!INTEGRAL_TYPE_P (in_type)
1738 || !INTEGRAL_TYPE_P (out_type)
1739 || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
1740 || !TYPE_UNSIGNED (in_type))
1741 return NULL;
1742
1743 vect_unpromoted_value unprom;
1744 tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
1745 if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
1746 return NULL;
1747
1748 stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
1749 if (!abd_pattern_vinfo)
1750 return NULL;
1751
1752 abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
1753 gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
1754 if (!abd_stmt
1755 || !gimple_call_internal_p (abd_stmt)
1756 || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
1757 return NULL;
1758
1759 tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
1760 tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
1761
1762 code_helper dummy_code;
1763 int dummy_int;
1764 auto_vec<tree> dummy_vec;
1765 if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
1766 vectype_out, vectype_in,
1767 &dummy_code, &dummy_code,
1768 &dummy_int, &dummy_vec))
1769 return NULL;
1770
1771 vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
1772
1773 *type_out = vectype_out;
1774
1775 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1776 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1777 tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
1778 gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
1779 abd_oprnd0, abd_oprnd1);
1780 gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
1781 gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
1782 return widen_abd_stmt;
1783 }
1784
1785 /* Function vect_recog_ctz_ffs_pattern
1786
1787 Try to find the following pattern:
1788
1789 TYPE1 A;
1790 TYPE1 B;
1791
1792 B = __builtin_ctz{,l,ll} (A);
1793
1794 or
1795
1796 B = __builtin_ffs{,l,ll} (A);
1797
1798 Input:
1799
1800 * STMT_VINFO: The stmt from which the pattern search begins.
1801 here it starts with B = __builtin_* (A);
1802
1803 Output:
1804
1805 * TYPE_OUT: The vector type of the output of this pattern.
1806
1807 * Return value: A new stmt that will be used to replace the sequence of
1808 stmts that constitute the pattern, using clz or popcount builtins. */
1809
1810 static gimple *
1811 vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1812 tree *type_out)
1813 {
1814 gimple *call_stmt = stmt_vinfo->stmt;
1815 gimple *pattern_stmt;
1816 tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1817 tree new_var;
1818 internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1819 bool defined_at_zero = true, defined_at_zero_new = false;
1820 int val = 0, val_new = 0;
1821 int prec;
1822 int sub = 0, add = 0;
1823 location_t loc;
1824
1825 if (!is_gimple_call (call_stmt))
1826 return NULL;
1827
1828 if (gimple_call_num_args (call_stmt) != 1)
1829 return NULL;
1830
1831 rhs_oprnd = gimple_call_arg (call_stmt, 0);
1832 rhs_type = TREE_TYPE (rhs_oprnd);
1833 lhs_oprnd = gimple_call_lhs (call_stmt);
1834 if (!lhs_oprnd)
1835 return NULL;
1836 lhs_type = TREE_TYPE (lhs_oprnd);
1837 if (!INTEGRAL_TYPE_P (lhs_type)
1838 || !INTEGRAL_TYPE_P (rhs_type)
1839 || !type_has_mode_precision_p (rhs_type)
1840 || TREE_CODE (rhs_oprnd) != SSA_NAME)
1841 return NULL;
1842
1843 switch (gimple_call_combined_fn (call_stmt))
1844 {
1845 CASE_CFN_CTZ:
1846 ifn = IFN_CTZ;
1847 if (!gimple_call_internal_p (call_stmt)
1848 || CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1849 val) != 2)
1850 defined_at_zero = false;
1851 break;
1852 CASE_CFN_FFS:
1853 ifn = IFN_FFS;
1854 break;
1855 default:
1856 return NULL;
1857 }
1858
1859 prec = TYPE_PRECISION (rhs_type);
1860 loc = gimple_location (call_stmt);
1861
1862 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1863 if (!vec_type)
1864 return NULL;
1865
1866 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1867 if (!vec_rhs_type)
1868 return NULL;
1869
1870 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1871 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1872 popcount<vector_mode>2. */
1873 if (!vec_type
1874 || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1875 OPTIMIZE_FOR_SPEED))
1876 return NULL;
1877
1878 if (ifn == IFN_FFS
1879 && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1880 OPTIMIZE_FOR_SPEED))
1881 {
1882 ifnnew = IFN_CTZ;
1883 defined_at_zero_new
1884 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1885 val_new) == 2;
1886 }
1887 else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1888 OPTIMIZE_FOR_SPEED))
1889 {
1890 ifnnew = IFN_CLZ;
1891 defined_at_zero_new
1892 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1893 val_new) == 2;
1894 }
1895 if ((ifnnew == IFN_LAST
1896 || (defined_at_zero && !defined_at_zero_new))
1897 && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1898 OPTIMIZE_FOR_SPEED))
1899 {
1900 ifnnew = IFN_POPCOUNT;
1901 defined_at_zero_new = true;
1902 val_new = prec;
1903 }
1904 if (ifnnew == IFN_LAST)
1905 return NULL;
1906
1907 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
1908
1909 if ((ifnnew == IFN_CLZ
1910 && defined_at_zero
1911 && defined_at_zero_new
1912 && val == prec
1913 && val_new == prec)
1914 || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1915 {
1916 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1917 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1918 if (ifnnew == IFN_CLZ)
1919 sub = prec;
1920 val_new = prec;
1921
1922 if (!TYPE_UNSIGNED (rhs_type))
1923 {
1924 rhs_type = unsigned_type_for (rhs_type);
1925 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1926 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1927 pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1928 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
1929 vec_rhs_type);
1930 rhs_oprnd = new_var;
1931 }
1932
1933 tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
1934 pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1935 build_int_cst (rhs_type, -1));
1936 gimple_set_location (pattern_stmt, loc);
1937 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1938
1939 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1940 pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1941 gimple_set_location (pattern_stmt, loc);
1942 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1943 rhs_oprnd = new_var;
1944
1945 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1946 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1947 m1, rhs_oprnd);
1948 gimple_set_location (pattern_stmt, loc);
1949 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1950 rhs_oprnd = new_var;
1951 }
1952 else if (ifnnew == IFN_CLZ)
1953 {
1954 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1955 .FFS (X) = PREC - .CLZ (X & -X). */
1956 sub = prec - (ifn == IFN_CTZ);
1957 val_new = sub - val_new;
1958
1959 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1960 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1961 gimple_set_location (pattern_stmt, loc);
1962 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1963
1964 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1965 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1966 rhs_oprnd, neg);
1967 gimple_set_location (pattern_stmt, loc);
1968 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1969 rhs_oprnd = new_var;
1970 }
1971 else if (ifnnew == IFN_POPCOUNT)
1972 {
1973 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1974 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1975 sub = prec + (ifn == IFN_FFS);
1976 val_new = sub;
1977
1978 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1979 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1980 gimple_set_location (pattern_stmt, loc);
1981 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1982
1983 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1984 pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
1985 rhs_oprnd, neg);
1986 gimple_set_location (pattern_stmt, loc);
1987 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1988 rhs_oprnd = new_var;
1989 }
1990 else if (ifnnew == IFN_CTZ)
1991 {
1992 /* .FFS (X) = .CTZ (X) + 1. */
1993 add = 1;
1994 val_new++;
1995 }
1996
1997 /* Create B = .IFNNEW (A). */
1998 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1999 pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
2000 gimple_call_set_lhs (pattern_stmt, new_var);
2001 gimple_set_location (pattern_stmt, loc);
2002 *type_out = vec_type;
2003
2004 if (sub)
2005 {
2006 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2007 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2008 pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
2009 build_int_cst (lhs_type, sub),
2010 new_var);
2011 gimple_set_location (pattern_stmt, loc);
2012 new_var = ret_var;
2013 }
2014 else if (add)
2015 {
2016 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2017 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2018 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2019 build_int_cst (lhs_type, add));
2020 gimple_set_location (pattern_stmt, loc);
2021 new_var = ret_var;
2022 }
2023
2024 if (defined_at_zero
2025 && (!defined_at_zero_new || val != val_new))
2026 {
2027 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2028 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2029 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2030 rhs_type = TREE_TYPE (rhs_oprnd);
2031 tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node,
2032 rhs_oprnd, build_zero_cst (rhs_type));
2033 pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
2034 new_var,
2035 build_int_cst (lhs_type, val));
2036 }
2037
2038 if (dump_enabled_p ())
2039 dump_printf_loc (MSG_NOTE, vect_location,
2040 "created pattern stmt: %G", pattern_stmt);
2041
2042 return pattern_stmt;
2043 }
2044
2045 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2046
2047 Try to find the following pattern:
2048
2049 UTYPE1 A;
2050 TYPE1 B;
2051 UTYPE2 temp_in;
2052 TYPE3 temp_out;
2053 temp_in = (UTYPE2)A;
2054
2055 temp_out = __builtin_popcount{,l,ll} (temp_in);
2056 B = (TYPE1) temp_out;
2057
2058 TYPE2 may or may not be equal to TYPE3.
2059 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2060 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2061
2062 Input:
2063
2064 * STMT_VINFO: The stmt from which the pattern search begins.
2065 here it starts with B = (TYPE1) temp_out;
2066
2067 Output:
2068
2069 * TYPE_OUT: The vector type of the output of this pattern.
2070
2071 * Return value: A new stmt that will be used to replace the sequence of
2072 stmts that constitute the pattern. In this case it will be:
2073 B = .POPCOUNT (A);
2074
2075 Similarly for clz, ctz and ffs.
2076 */
2077
2078 static gimple *
2079 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
2080 stmt_vec_info stmt_vinfo,
2081 tree *type_out)
2082 {
2083 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
2084 gimple *call_stmt, *pattern_stmt;
2085 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
2086 internal_fn ifn = IFN_LAST;
2087 int addend = 0;
2088
2089 /* Find B = (TYPE1) temp_out. */
2090 if (!last_stmt)
2091 return NULL;
2092 tree_code code = gimple_assign_rhs_code (last_stmt);
2093 if (!CONVERT_EXPR_CODE_P (code))
2094 return NULL;
2095
2096 lhs_oprnd = gimple_assign_lhs (last_stmt);
2097 lhs_type = TREE_TYPE (lhs_oprnd);
2098 if (!INTEGRAL_TYPE_P (lhs_type))
2099 return NULL;
2100
2101 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
2102 if (TREE_CODE (rhs_oprnd) != SSA_NAME
2103 || !has_single_use (rhs_oprnd))
2104 return NULL;
2105 call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
2106
2107 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2108 if (!is_gimple_call (call_stmt))
2109 return NULL;
2110 switch (gimple_call_combined_fn (call_stmt))
2111 {
2112 int val;
2113 CASE_CFN_POPCOUNT:
2114 ifn = IFN_POPCOUNT;
2115 break;
2116 CASE_CFN_CLZ:
2117 ifn = IFN_CLZ;
2118 /* Punt if call result is unsigned and defined value at zero
2119 is negative, as the negative value doesn't extend correctly. */
2120 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2121 && gimple_call_internal_p (call_stmt)
2122 && CLZ_DEFINED_VALUE_AT_ZERO
2123 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2124 && val < 0)
2125 return NULL;
2126 break;
2127 CASE_CFN_CTZ:
2128 ifn = IFN_CTZ;
2129 /* Punt if call result is unsigned and defined value at zero
2130 is negative, as the negative value doesn't extend correctly. */
2131 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2132 && gimple_call_internal_p (call_stmt)
2133 && CTZ_DEFINED_VALUE_AT_ZERO
2134 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2135 && val < 0)
2136 return NULL;
2137 break;
2138 CASE_CFN_FFS:
2139 ifn = IFN_FFS;
2140 break;
2141 default:
2142 return NULL;
2143 }
2144
2145 if (gimple_call_num_args (call_stmt) != 1)
2146 return NULL;
2147
2148 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2149 vect_unpromoted_value unprom_diff;
2150 rhs_origin
2151 = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
2152
2153 if (!rhs_origin)
2154 return NULL;
2155
2156 /* Input and output of .POPCOUNT should be same-precision integer. */
2157 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
2158 return NULL;
2159
2160 /* Also A should be unsigned or same precision as temp_in, otherwise
2161 different builtins/internal functions have different behaviors. */
2162 if (TYPE_PRECISION (unprom_diff.type)
2163 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
2164 switch (ifn)
2165 {
2166 case IFN_POPCOUNT:
2167 /* For popcount require zero extension, which doesn't add any
2168 further bits to the count. */
2169 if (!TYPE_UNSIGNED (unprom_diff.type))
2170 return NULL;
2171 break;
2172 case IFN_CLZ:
2173 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2174 if it is undefined at zero or if it matches also for the
2175 defined value there. */
2176 if (!TYPE_UNSIGNED (unprom_diff.type))
2177 return NULL;
2178 if (!type_has_mode_precision_p (lhs_type)
2179 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
2180 return NULL;
2181 addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
2182 - TYPE_PRECISION (lhs_type));
2183 if (gimple_call_internal_p (call_stmt))
2184 {
2185 int val1, val2;
2186 int d1
2187 = CLZ_DEFINED_VALUE_AT_ZERO
2188 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val1);
2189 int d2
2190 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2191 val2);
2192 if (d1 != 2)
2193 break;
2194 if (d2 != 2 || val1 != val2 + addend)
2195 return NULL;
2196 }
2197 break;
2198 case IFN_CTZ:
2199 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2200 if it is undefined at zero or if it matches also for the
2201 defined value there. */
2202 if (gimple_call_internal_p (call_stmt))
2203 {
2204 int val1, val2;
2205 int d1
2206 = CTZ_DEFINED_VALUE_AT_ZERO
2207 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val1);
2208 int d2
2209 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2210 val2);
2211 if (d1 != 2)
2212 break;
2213 if (d2 != 2 || val1 != val2)
2214 return NULL;
2215 }
2216 break;
2217 case IFN_FFS:
2218 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2219 break;
2220 default:
2221 gcc_unreachable ();
2222 }
2223
2224 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
2225 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2226 if (!vec_type)
2227 return NULL;
2228
2229 bool supported
2230 = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
2231 if (!supported)
2232 switch (ifn)
2233 {
2234 case IFN_POPCOUNT:
2235 case IFN_CLZ:
2236 return NULL;
2237 case IFN_FFS:
2238 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2239 if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
2240 OPTIMIZE_FOR_SPEED))
2241 break;
2242 /* FALLTHRU */
2243 case IFN_CTZ:
2244 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2245 clz or popcount. */
2246 if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
2247 OPTIMIZE_FOR_SPEED))
2248 break;
2249 if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
2250 OPTIMIZE_FOR_SPEED))
2251 break;
2252 return NULL;
2253 default:
2254 gcc_unreachable ();
2255 }
2256
2257 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2258 call_stmt);
2259
2260 /* Create B = .POPCOUNT (A). */
2261 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2262 pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
2263 gimple_call_set_lhs (pattern_stmt, new_var);
2264 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2265 *type_out = vec_type;
2266
2267 if (dump_enabled_p ())
2268 dump_printf_loc (MSG_NOTE, vect_location,
2269 "created pattern stmt: %G", pattern_stmt);
2270
2271 if (addend)
2272 {
2273 gcc_assert (supported);
2274 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2275 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2276 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2277 build_int_cst (lhs_type, addend));
2278 }
2279 else if (!supported)
2280 {
2281 stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2282 STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2283 pattern_stmt
2284 = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
2285 if (pattern_stmt == NULL)
2286 return NULL;
2287 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2288 {
2289 gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2290 gimple_seq_add_seq_without_update (pseq, seq);
2291 }
2292 }
2293 return pattern_stmt;
2294 }
2295
2296 /* Function vect_recog_pow_pattern
2297
2298 Try to find the following pattern:
2299
2300 x = POW (y, N);
2301
2302 with POW being one of pow, powf, powi, powif and N being
2303 either 2 or 0.5.
2304
2305 Input:
2306
2307 * STMT_VINFO: The stmt from which the pattern search begins.
2308
2309 Output:
2310
2311 * TYPE_OUT: The type of the output of this pattern.
2312
2313 * Return value: A new stmt that will be used to replace the sequence of
2314 stmts that constitute the pattern. In this case it will be:
2315 x = x * x
2316 or
2317 x = sqrt (x)
2318 */
2319
2320 static gimple *
2321 vect_recog_pow_pattern (vec_info *vinfo,
2322 stmt_vec_info stmt_vinfo, tree *type_out)
2323 {
2324 gimple *last_stmt = stmt_vinfo->stmt;
2325 tree base, exp;
2326 gimple *stmt;
2327 tree var;
2328
2329 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
2330 return NULL;
2331
2332 switch (gimple_call_combined_fn (last_stmt))
2333 {
2334 CASE_CFN_POW:
2335 CASE_CFN_POWI:
2336 break;
2337
2338 default:
2339 return NULL;
2340 }
2341
2342 base = gimple_call_arg (last_stmt, 0);
2343 exp = gimple_call_arg (last_stmt, 1);
2344 if (TREE_CODE (exp) != REAL_CST
2345 && TREE_CODE (exp) != INTEGER_CST)
2346 {
2347 if (flag_unsafe_math_optimizations
2348 && TREE_CODE (base) == REAL_CST
2349 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2350 {
2351 combined_fn log_cfn;
2352 built_in_function exp_bfn;
2353 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
2354 {
2355 case BUILT_IN_POW:
2356 log_cfn = CFN_BUILT_IN_LOG;
2357 exp_bfn = BUILT_IN_EXP;
2358 break;
2359 case BUILT_IN_POWF:
2360 log_cfn = CFN_BUILT_IN_LOGF;
2361 exp_bfn = BUILT_IN_EXPF;
2362 break;
2363 case BUILT_IN_POWL:
2364 log_cfn = CFN_BUILT_IN_LOGL;
2365 exp_bfn = BUILT_IN_EXPL;
2366 break;
2367 default:
2368 return NULL;
2369 }
2370 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2371 tree exp_decl = builtin_decl_implicit (exp_bfn);
2372 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2373 does that, but if C is a power of 2, we want to use
2374 exp2 (log2 (C) * x) in the non-vectorized version, but for
2375 vectorization we don't have vectorized exp2. */
2376 if (logc
2377 && TREE_CODE (logc) == REAL_CST
2378 && exp_decl
2379 && lookup_attribute ("omp declare simd",
2380 DECL_ATTRIBUTES (exp_decl)))
2381 {
2382 cgraph_node *node = cgraph_node::get_create (exp_decl);
2383 if (node->simd_clones == NULL)
2384 {
2385 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2386 || node->definition)
2387 return NULL;
2388 expand_simd_clones (node);
2389 if (node->simd_clones == NULL)
2390 return NULL;
2391 }
2392 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2393 if (!*type_out)
2394 return NULL;
2395 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2396 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2397 append_pattern_def_seq (vinfo, stmt_vinfo, g);
2398 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2399 g = gimple_build_call (exp_decl, 1, def);
2400 gimple_call_set_lhs (g, res);
2401 return g;
2402 }
2403 }
2404
2405 return NULL;
2406 }
2407
2408 /* We now have a pow or powi builtin function call with a constant
2409 exponent. */
2410
2411 /* Catch squaring. */
2412 if ((tree_fits_shwi_p (exp)
2413 && tree_to_shwi (exp) == 2)
2414 || (TREE_CODE (exp) == REAL_CST
2415 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2416 {
2417 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
2418 TREE_TYPE (base), type_out))
2419 return NULL;
2420
2421 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2422 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2423 return stmt;
2424 }
2425
2426 /* Catch square root. */
2427 if (TREE_CODE (exp) == REAL_CST
2428 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2429 {
2430 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2431 if (*type_out
2432 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2433 OPTIMIZE_FOR_SPEED))
2434 {
2435 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2436 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2437 gimple_call_set_lhs (stmt, var);
2438 gimple_call_set_nothrow (stmt, true);
2439 return stmt;
2440 }
2441 }
2442
2443 return NULL;
2444 }
2445
2446
2447 /* Function vect_recog_widen_sum_pattern
2448
2449 Try to find the following pattern:
2450
2451 type x_t;
2452 TYPE x_T, sum = init;
2453 loop:
2454 sum_0 = phi <init, sum_1>
2455 S1 x_t = *p;
2456 S2 x_T = (TYPE) x_t;
2457 S3 sum_1 = x_T + sum_0;
2458
2459 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2460 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2461 a special case of a reduction computation.
2462
2463 Input:
2464
2465 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2466 when this function is called with S3, the pattern {S2,S3} will be detected.
2467
2468 Output:
2469
2470 * TYPE_OUT: The type of the output of this pattern.
2471
2472 * Return value: A new stmt that will be used to replace the sequence of
2473 stmts that constitute the pattern. In this case it will be:
2474 WIDEN_SUM <x_t, sum_0>
2475
2476 Note: The widening-sum idiom is a widening reduction pattern that is
2477 vectorized without preserving all the intermediate results. It
2478 produces only N/2 (widened) results (by summing up pairs of
2479 intermediate results) rather than all N results. Therefore, we
2480 cannot allow this pattern when we want to get all the results and in
2481 the correct order (as is the case when this computation is in an
2482 inner-loop nested in an outer-loop that us being vectorized). */
2483
2484 static gimple *
2485 vect_recog_widen_sum_pattern (vec_info *vinfo,
2486 stmt_vec_info stmt_vinfo, tree *type_out)
2487 {
2488 gimple *last_stmt = stmt_vinfo->stmt;
2489 tree oprnd0, oprnd1;
2490 tree type;
2491 gimple *pattern_stmt;
2492 tree var;
2493
2494 /* Look for the following pattern
2495 DX = (TYPE) X;
2496 sum_1 = DX + sum_0;
2497 In which DX is at least double the size of X, and sum_1 has been
2498 recognized as a reduction variable.
2499 */
2500
2501 /* Starting from LAST_STMT, follow the defs of its uses in search
2502 of the above pattern. */
2503
2504 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
2505 &oprnd0, &oprnd1)
2506 || TREE_CODE (oprnd0) != SSA_NAME
2507 || !vinfo->lookup_def (oprnd0))
2508 return NULL;
2509
2510 type = TREE_TYPE (gimple_get_lhs (last_stmt));
2511
2512 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2513 we know that oprnd1 is the reduction variable (defined by a loop-header
2514 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2515 Left to check that oprnd0 is defined by a cast from type 'type' to type
2516 'TYPE'. */
2517
2518 vect_unpromoted_value unprom0;
2519 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
2520 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2521 return NULL;
2522
2523 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
2524
2525 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
2526 unprom0.type, type_out))
2527 return NULL;
2528
2529 var = vect_recog_temp_ssa_var (type, NULL);
2530 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2531
2532 return pattern_stmt;
2533 }
2534
2535 /* Function vect_recog_bitfield_ref_pattern
2536
2537 Try to find the following pattern:
2538
2539 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2540 result = (type_out) bf_value;
2541
2542 where type_out is a non-bitfield type, that is to say, it's precision matches
2543 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2544
2545 Input:
2546
2547 * STMT_VINFO: The stmt from which the pattern search begins.
2548 here it starts with:
2549 result = (type_out) bf_value;
2550
2551 Output:
2552
2553 * TYPE_OUT: The vector type of the output of this pattern.
2554
2555 * Return value: A new stmt that will be used to replace the sequence of
2556 stmts that constitute the pattern. If the precision of type_out is bigger
2557 than the precision type of _1 we perform the widening before the shifting,
2558 since the new precision will be large enough to shift the value and moving
2559 widening operations up the statement chain enables the generation of
2560 widening loads. If we are widening and the operation after the pattern is
2561 an addition then we mask first and shift later, to enable the generation of
2562 shifting adds. In the case of narrowing we will always mask first, shift
2563 last and then perform a narrowing operation. This will enable the
2564 generation of narrowing shifts.
2565
2566 Widening with mask first, shift later:
2567 container = (type_out) container;
2568 masked = container & (((1 << bitsize) - 1) << bitpos);
2569 result = masked >> bitpos;
2570
2571 Widening with shift first, mask last:
2572 container = (type_out) container;
2573 shifted = container >> bitpos;
2574 result = shifted & ((1 << bitsize) - 1);
2575
2576 Narrowing:
2577 masked = container & (((1 << bitsize) - 1) << bitpos);
2578 result = masked >> bitpos;
2579 result = (type_out) result;
2580
2581 If the bitfield is signed and it's wider than type_out, we need to
2582 keep the result sign-extended:
2583 container = (type) container;
2584 masked = container << (prec - bitsize - bitpos);
2585 result = (type_out) (masked >> (prec - bitsize));
2586
2587 Here type is the signed variant of the wider of type_out and the type
2588 of container.
2589
2590 The shifting is always optional depending on whether bitpos != 0.
2591
2592 */
2593
2594 static gimple *
2595 vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2596 tree *type_out)
2597 {
2598 gassign *first_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2599
2600 if (!first_stmt)
2601 return NULL;
2602
2603 gassign *bf_stmt;
2604 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (first_stmt))
2605 && TREE_CODE (gimple_assign_rhs1 (first_stmt)) == SSA_NAME)
2606 {
2607 gimple *second_stmt
2608 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (first_stmt));
2609 bf_stmt = dyn_cast <gassign *> (second_stmt);
2610 if (!bf_stmt
2611 || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
2612 return NULL;
2613 }
2614 else
2615 return NULL;
2616
2617 tree bf_ref = gimple_assign_rhs1 (bf_stmt);
2618 tree container = TREE_OPERAND (bf_ref, 0);
2619
2620 if (!bit_field_offset (bf_ref).is_constant ()
2621 || !bit_field_size (bf_ref).is_constant ()
2622 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2623 return NULL;
2624
2625 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2626 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2627 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2628 return NULL;
2629
2630 gimple *use_stmt, *pattern_stmt;
2631 use_operand_p use_p;
2632 tree ret = gimple_assign_lhs (first_stmt);
2633 tree ret_type = TREE_TYPE (ret);
2634 bool shift_first = true;
2635 tree container_type = TREE_TYPE (container);
2636 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2637
2638 /* Calculate shift_n before the adjustments for widening loads, otherwise
2639 the container may change and we have to consider offset change for
2640 widening loads on big endianness. The shift_n calculated here can be
2641 independent of widening. */
2642 unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
2643 unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
2644 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2645 if (BYTES_BIG_ENDIAN)
2646 shift_n = prec - shift_n - mask_width;
2647
2648 bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
2649 TYPE_PRECISION (ret_type) > mask_width);
2650 bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
2651 TYPE_PRECISION (ret_type));
2652
2653 /* We move the conversion earlier if the loaded type is smaller than the
2654 return type to enable the use of widening loads. And if we need a
2655 sign extension, we need to convert the loaded value early to a signed
2656 type as well. */
2657 if (ref_sext || load_widen)
2658 {
2659 tree type = load_widen ? ret_type : container_type;
2660 if (ref_sext)
2661 type = gimple_signed_type (type);
2662 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
2663 NOP_EXPR, container);
2664 container = gimple_get_lhs (pattern_stmt);
2665 container_type = TREE_TYPE (container);
2666 prec = tree_to_uhwi (TYPE_SIZE (container_type));
2667 vectype = get_vectype_for_scalar_type (vinfo, container_type);
2668 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2669 }
2670 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2671 /* If we are doing the conversion last then also delay the shift as we may
2672 be able to combine the shift and conversion in certain cases. */
2673 shift_first = false;
2674
2675 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2676 PLUS_EXPR then do the shift last as some targets can combine the shift and
2677 add into a single instruction. */
2678 if (single_imm_use (gimple_assign_lhs (first_stmt), &use_p, &use_stmt))
2679 {
2680 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
2681 && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
2682 shift_first = false;
2683 }
2684
2685 /* If we don't have to shift we only generate the mask, so just fix the
2686 code-path to shift_first. */
2687 if (shift_n == 0)
2688 shift_first = true;
2689
2690 tree result;
2691 if (shift_first && !ref_sext)
2692 {
2693 tree shifted = container;
2694 if (shift_n)
2695 {
2696 pattern_stmt
2697 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2698 RSHIFT_EXPR, container,
2699 build_int_cst (sizetype, shift_n));
2700 shifted = gimple_assign_lhs (pattern_stmt);
2701 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2702 }
2703
2704 tree mask = wide_int_to_tree (container_type,
2705 wi::mask (mask_width, false, prec));
2706
2707 pattern_stmt
2708 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2709 BIT_AND_EXPR, shifted, mask);
2710 result = gimple_assign_lhs (pattern_stmt);
2711 }
2712 else
2713 {
2714 tree temp = vect_recog_temp_ssa_var (container_type);
2715 if (!ref_sext)
2716 {
2717 tree mask = wide_int_to_tree (container_type,
2718 wi::shifted_mask (shift_n,
2719 mask_width,
2720 false, prec));
2721 pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
2722 container, mask);
2723 }
2724 else
2725 {
2726 HOST_WIDE_INT shl = prec - shift_n - mask_width;
2727 shift_n += shl;
2728 pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
2729 container,
2730 build_int_cst (sizetype,
2731 shl));
2732 }
2733
2734 tree masked = gimple_assign_lhs (pattern_stmt);
2735 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2736 pattern_stmt
2737 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2738 RSHIFT_EXPR, masked,
2739 build_int_cst (sizetype, shift_n));
2740 result = gimple_assign_lhs (pattern_stmt);
2741 }
2742
2743 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2744 {
2745 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2746 pattern_stmt
2747 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2748 NOP_EXPR, result);
2749 }
2750
2751 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2752 vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2753
2754 return pattern_stmt;
2755 }
2756
2757 /* Function vect_recog_bit_insert_pattern
2758
2759 Try to find the following pattern:
2760
2761 written = BIT_INSERT_EXPR (container, value, bitpos);
2762
2763 Input:
2764
2765 * STMT_VINFO: The stmt we want to replace.
2766
2767 Output:
2768
2769 * TYPE_OUT: The vector type of the output of this pattern.
2770
2771 * Return value: A new stmt that will be used to replace the sequence of
2772 stmts that constitute the pattern. In this case it will be:
2773 value = (container_type) value; // Make sure
2774 shifted = value << bitpos; // Shift value into place
2775 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2776 // the 'to-write value'.
2777 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2778 // write to from the value we want
2779 // to write to.
2780 written = cleared | masked; // Write bits.
2781
2782
2783 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2784 bits corresponding to the real size of the bitfield value we are writing to.
2785 The shifting is always optional depending on whether bitpos != 0.
2786
2787 */
2788
2789 static gimple *
2790 vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2791 tree *type_out)
2792 {
2793 gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2794 if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2795 return NULL;
2796
2797 tree container = gimple_assign_rhs1 (bf_stmt);
2798 tree value = gimple_assign_rhs2 (bf_stmt);
2799 tree shift = gimple_assign_rhs3 (bf_stmt);
2800
2801 tree bf_type = TREE_TYPE (value);
2802 tree container_type = TREE_TYPE (container);
2803
2804 if (!INTEGRAL_TYPE_P (container_type)
2805 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2806 return NULL;
2807
2808 gimple *pattern_stmt;
2809
2810 vect_unpromoted_value unprom;
2811 unprom.set_op (value, vect_internal_def);
2812 value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2813 get_vectype_for_scalar_type (vinfo,
2814 container_type));
2815
2816 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2817 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2818 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2819 if (BYTES_BIG_ENDIAN)
2820 {
2821 shift_n = prec - shift_n - mask_width;
2822 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2823 }
2824
2825 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2826 {
2827 pattern_stmt =
2828 gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2829 NOP_EXPR, value);
2830 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2831 value = gimple_get_lhs (pattern_stmt);
2832 }
2833
2834 /* Shift VALUE into place. */
2835 tree shifted = value;
2836 if (shift_n)
2837 {
2838 gimple_seq stmts = NULL;
2839 shifted
2840 = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2841 if (!gimple_seq_empty_p (stmts))
2842 append_pattern_def_seq (vinfo, stmt_info,
2843 gimple_seq_first_stmt (stmts));
2844 }
2845
2846 tree mask_t
2847 = wide_int_to_tree (container_type,
2848 wi::shifted_mask (shift_n, mask_width, false, prec));
2849
2850 /* Clear bits we don't want to write back from SHIFTED. */
2851 gimple_seq stmts = NULL;
2852 tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2853 mask_t);
2854 if (!gimple_seq_empty_p (stmts))
2855 {
2856 pattern_stmt = gimple_seq_first_stmt (stmts);
2857 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2858 }
2859
2860 /* Mask off the bits in the container that we are to write to. */
2861 mask_t = wide_int_to_tree (container_type,
2862 wi::shifted_mask (shift_n, mask_width, true, prec));
2863 tree cleared = vect_recog_temp_ssa_var (container_type);
2864 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2865 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2866
2867 /* Write MASKED into CLEARED. */
2868 pattern_stmt
2869 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2870 BIT_IOR_EXPR, cleared, masked);
2871
2872 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2873 vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2874
2875 return pattern_stmt;
2876 }
2877
2878
2879 /* Recognize cases in which an operation is performed in one type WTYPE
2880 but could be done more efficiently in a narrower type NTYPE. For example,
2881 if we have:
2882
2883 ATYPE a; // narrower than NTYPE
2884 BTYPE b; // narrower than NTYPE
2885 WTYPE aw = (WTYPE) a;
2886 WTYPE bw = (WTYPE) b;
2887 WTYPE res = aw + bw; // only uses of aw and bw
2888
2889 then it would be more efficient to do:
2890
2891 NTYPE an = (NTYPE) a;
2892 NTYPE bn = (NTYPE) b;
2893 NTYPE resn = an + bn;
2894 WTYPE res = (WTYPE) resn;
2895
2896 Other situations include things like:
2897
2898 ATYPE a; // NTYPE or narrower
2899 WTYPE aw = (WTYPE) a;
2900 WTYPE res = aw + b;
2901
2902 when only "(NTYPE) res" is significant. In that case it's more efficient
2903 to truncate "b" and do the operation on NTYPE instead:
2904
2905 NTYPE an = (NTYPE) a;
2906 NTYPE bn = (NTYPE) b; // truncation
2907 NTYPE resn = an + bn;
2908 WTYPE res = (WTYPE) resn;
2909
2910 All users of "res" should then use "resn" instead, making the final
2911 statement dead (not marked as relevant). The final statement is still
2912 needed to maintain the type correctness of the IR.
2913
2914 vect_determine_precisions has already determined the minimum
2915 precison of the operation and the minimum precision required
2916 by users of the result. */
2917
2918 static gimple *
2919 vect_recog_over_widening_pattern (vec_info *vinfo,
2920 stmt_vec_info last_stmt_info, tree *type_out)
2921 {
2922 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2923 if (!last_stmt)
2924 return NULL;
2925
2926 /* See whether we have found that this operation can be done on a
2927 narrower type without changing its semantics. */
2928 unsigned int new_precision = last_stmt_info->operation_precision;
2929 if (!new_precision)
2930 return NULL;
2931
2932 tree lhs = gimple_assign_lhs (last_stmt);
2933 tree type = TREE_TYPE (lhs);
2934 tree_code code = gimple_assign_rhs_code (last_stmt);
2935
2936 /* Punt for reductions where we don't handle the type conversions. */
2937 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
2938 return NULL;
2939
2940 /* Keep the first operand of a COND_EXPR as-is: only the other two
2941 operands are interesting. */
2942 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
2943
2944 /* Check the operands. */
2945 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
2946 auto_vec <vect_unpromoted_value, 3> unprom (nops);
2947 unprom.quick_grow (nops);
2948 unsigned int min_precision = 0;
2949 bool single_use_p = false;
2950 for (unsigned int i = 0; i < nops; ++i)
2951 {
2952 tree op = gimple_op (last_stmt, first_op + i);
2953 if (TREE_CODE (op) == INTEGER_CST)
2954 unprom[i].set_op (op, vect_constant_def);
2955 else if (TREE_CODE (op) == SSA_NAME)
2956 {
2957 bool op_single_use_p = true;
2958 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
2959 &op_single_use_p))
2960 return NULL;
2961 /* If:
2962
2963 (1) N bits of the result are needed;
2964 (2) all inputs are widened from M<N bits; and
2965 (3) one operand OP is a single-use SSA name
2966
2967 we can shift the M->N widening from OP to the output
2968 without changing the number or type of extensions involved.
2969 This then reduces the number of copies of STMT_INFO.
2970
2971 If instead of (3) more than one operand is a single-use SSA name,
2972 shifting the extension to the output is even more of a win.
2973
2974 If instead:
2975
2976 (1) N bits of the result are needed;
2977 (2) one operand OP2 is widened from M2<N bits;
2978 (3) another operand OP1 is widened from M1<M2 bits; and
2979 (4) both OP1 and OP2 are single-use
2980
2981 the choice is between:
2982
2983 (a) truncating OP2 to M1, doing the operation on M1,
2984 and then widening the result to N
2985
2986 (b) widening OP1 to M2, doing the operation on M2, and then
2987 widening the result to N
2988
2989 Both shift the M2->N widening of the inputs to the output.
2990 (a) additionally shifts the M1->M2 widening to the output;
2991 it requires fewer copies of STMT_INFO but requires an extra
2992 M2->M1 truncation.
2993
2994 Which is better will depend on the complexity and cost of
2995 STMT_INFO, which is hard to predict at this stage. However,
2996 a clear tie-breaker in favor of (b) is the fact that the
2997 truncation in (a) increases the length of the operation chain.
2998
2999 If instead of (4) only one of OP1 or OP2 is single-use,
3000 (b) is still a win over doing the operation in N bits:
3001 it still shifts the M2->N widening on the single-use operand
3002 to the output and reduces the number of STMT_INFO copies.
3003
3004 If neither operand is single-use then operating on fewer than
3005 N bits might lead to more extensions overall. Whether it does
3006 or not depends on global information about the vectorization
3007 region, and whether that's a good trade-off would again
3008 depend on the complexity and cost of the statements involved,
3009 as well as things like register pressure that are not normally
3010 modelled at this stage. We therefore ignore these cases
3011 and just optimize the clear single-use wins above.
3012
3013 Thus we take the maximum precision of the unpromoted operands
3014 and record whether any operand is single-use. */
3015 if (unprom[i].dt == vect_internal_def)
3016 {
3017 min_precision = MAX (min_precision,
3018 TYPE_PRECISION (unprom[i].type));
3019 single_use_p |= op_single_use_p;
3020 }
3021 }
3022 else
3023 return NULL;
3024 }
3025
3026 /* Although the operation could be done in operation_precision, we have
3027 to balance that against introducing extra truncations or extensions.
3028 Calculate the minimum precision that can be handled efficiently.
3029
3030 The loop above determined that the operation could be handled
3031 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3032 extension from the inputs to the output without introducing more
3033 instructions, and would reduce the number of instructions required
3034 for STMT_INFO itself.
3035
3036 vect_determine_precisions has also determined that the result only
3037 needs min_output_precision bits. Truncating by a factor of N times
3038 requires a tree of N - 1 instructions, so if TYPE is N times wider
3039 than min_output_precision, doing the operation in TYPE and truncating
3040 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3041 In contrast:
3042
3043 - truncating the input to a unary operation and doing the operation
3044 in the new type requires at most N - 1 + 1 = N instructions per
3045 output vector
3046
3047 - doing the same for a binary operation requires at most
3048 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3049
3050 Both unary and binary operations require fewer instructions than
3051 this if the operands were extended from a suitable truncated form.
3052 Thus there is usually nothing to lose by doing operations in
3053 min_output_precision bits, but there can be something to gain. */
3054 if (!single_use_p)
3055 min_precision = last_stmt_info->min_output_precision;
3056 else
3057 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
3058
3059 /* Apply the minimum efficient precision we just calculated. */
3060 if (new_precision < min_precision)
3061 new_precision = min_precision;
3062 new_precision = vect_element_precision (new_precision);
3063 if (new_precision >= TYPE_PRECISION (type))
3064 return NULL;
3065
3066 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
3067
3068 *type_out = get_vectype_for_scalar_type (vinfo, type);
3069 if (!*type_out)
3070 return NULL;
3071
3072 /* We've found a viable pattern. Get the new type of the operation. */
3073 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
3074 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
3075
3076 /* If we're truncating an operation, we need to make sure that we
3077 don't introduce new undefined overflow. The codes tested here are
3078 a subset of those accepted by vect_truncatable_operation_p. */
3079 tree op_type = new_type;
3080 if (TYPE_OVERFLOW_UNDEFINED (new_type)
3081 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
3082 op_type = build_nonstandard_integer_type (new_precision, true);
3083
3084 /* We specifically don't check here whether the target supports the
3085 new operation, since it might be something that a later pattern
3086 wants to rewrite anyway. If targets have a minimum element size
3087 for some optabs, we should pattern-match smaller ops to larger ops
3088 where beneficial. */
3089 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3090 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
3091 if (!new_vectype || !op_vectype)
3092 return NULL;
3093
3094 if (dump_enabled_p ())
3095 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
3096 type, new_type);
3097
3098 /* Calculate the rhs operands for an operation on OP_TYPE. */
3099 tree ops[3] = {};
3100 for (unsigned int i = 1; i < first_op; ++i)
3101 ops[i - 1] = gimple_op (last_stmt, i);
3102 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
3103 op_type, &unprom[0], op_vectype);
3104
3105 /* Use the operation to produce a result of type OP_TYPE. */
3106 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3107 gimple *pattern_stmt = gimple_build_assign (new_var, code,
3108 ops[0], ops[1], ops[2]);
3109 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3110
3111 if (dump_enabled_p ())
3112 dump_printf_loc (MSG_NOTE, vect_location,
3113 "created pattern stmt: %G", pattern_stmt);
3114
3115 /* Convert back to the original signedness, if OP_TYPE is different
3116 from NEW_TYPE. */
3117 if (op_type != new_type)
3118 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
3119 pattern_stmt, op_vectype);
3120
3121 /* Promote the result to the original type. */
3122 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
3123 pattern_stmt, new_vectype);
3124
3125 return pattern_stmt;
3126 }
3127
3128 /* Recognize the following patterns:
3129
3130 ATYPE a; // narrower than TYPE
3131 BTYPE b; // narrower than TYPE
3132
3133 1) Multiply high with scaling
3134 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3135 Here, c is bitsize (TYPE) / 2 - 1.
3136
3137 2) ... or also with rounding
3138 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3139 Here, d is bitsize (TYPE) / 2 - 2.
3140
3141 3) Normal multiply high
3142 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3143 Here, e is bitsize (TYPE) / 2.
3144
3145 where only the bottom half of res is used. */
3146
3147 static gimple *
3148 vect_recog_mulhs_pattern (vec_info *vinfo,
3149 stmt_vec_info last_stmt_info, tree *type_out)
3150 {
3151 /* Check for a right shift. */
3152 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3153 if (!last_stmt
3154 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
3155 return NULL;
3156
3157 /* Check that the shift result is wider than the users of the
3158 result need (i.e. that narrowing would be a natural choice). */
3159 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3160 unsigned int target_precision
3161 = vect_element_precision (last_stmt_info->min_output_precision);
3162 if (!INTEGRAL_TYPE_P (lhs_type)
3163 || target_precision >= TYPE_PRECISION (lhs_type))
3164 return NULL;
3165
3166 /* Look through any change in sign on the outer shift input. */
3167 vect_unpromoted_value unprom_rshift_input;
3168 tree rshift_input = vect_look_through_possible_promotion
3169 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
3170 if (!rshift_input
3171 || TYPE_PRECISION (TREE_TYPE (rshift_input))
3172 != TYPE_PRECISION (lhs_type))
3173 return NULL;
3174
3175 /* Get the definition of the shift input. */
3176 stmt_vec_info rshift_input_stmt_info
3177 = vect_get_internal_def (vinfo, rshift_input);
3178 if (!rshift_input_stmt_info)
3179 return NULL;
3180 gassign *rshift_input_stmt
3181 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
3182 if (!rshift_input_stmt)
3183 return NULL;
3184
3185 stmt_vec_info mulh_stmt_info;
3186 tree scale_term;
3187 bool rounding_p = false;
3188
3189 /* Check for the presence of the rounding term. */
3190 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
3191 {
3192 /* Check that the outer shift was by 1. */
3193 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
3194 return NULL;
3195
3196 /* Check that the second operand of the PLUS_EXPR is 1. */
3197 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
3198 return NULL;
3199
3200 /* Look through any change in sign on the addition input. */
3201 vect_unpromoted_value unprom_plus_input;
3202 tree plus_input = vect_look_through_possible_promotion
3203 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
3204 if (!plus_input
3205 || TYPE_PRECISION (TREE_TYPE (plus_input))
3206 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
3207 return NULL;
3208
3209 /* Get the definition of the multiply-high-scale part. */
3210 stmt_vec_info plus_input_stmt_info
3211 = vect_get_internal_def (vinfo, plus_input);
3212 if (!plus_input_stmt_info)
3213 return NULL;
3214 gassign *plus_input_stmt
3215 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
3216 if (!plus_input_stmt
3217 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
3218 return NULL;
3219
3220 /* Look through any change in sign on the scaling input. */
3221 vect_unpromoted_value unprom_scale_input;
3222 tree scale_input = vect_look_through_possible_promotion
3223 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
3224 if (!scale_input
3225 || TYPE_PRECISION (TREE_TYPE (scale_input))
3226 != TYPE_PRECISION (TREE_TYPE (plus_input)))
3227 return NULL;
3228
3229 /* Get the definition of the multiply-high part. */
3230 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
3231 if (!mulh_stmt_info)
3232 return NULL;
3233
3234 /* Get the scaling term. */
3235 scale_term = gimple_assign_rhs2 (plus_input_stmt);
3236 rounding_p = true;
3237 }
3238 else
3239 {
3240 mulh_stmt_info = rshift_input_stmt_info;
3241 scale_term = gimple_assign_rhs2 (last_stmt);
3242 }
3243
3244 /* Check that the scaling factor is constant. */
3245 if (TREE_CODE (scale_term) != INTEGER_CST)
3246 return NULL;
3247
3248 /* Check whether the scaling input term can be seen as two widened
3249 inputs multiplied together. */
3250 vect_unpromoted_value unprom_mult[2];
3251 tree new_type;
3252 unsigned int nops
3253 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
3254 false, 2, unprom_mult, &new_type);
3255 if (nops != 2)
3256 return NULL;
3257
3258 /* Adjust output precision. */
3259 if (TYPE_PRECISION (new_type) < target_precision)
3260 new_type = build_nonstandard_integer_type
3261 (target_precision, TYPE_UNSIGNED (new_type));
3262
3263 unsigned mult_precision = TYPE_PRECISION (new_type);
3264 internal_fn ifn;
3265 /* Check that the scaling factor is expected. Instead of
3266 target_precision, we should use the one that we actually
3267 use for internal function. */
3268 if (rounding_p)
3269 {
3270 /* Check pattern 2). */
3271 if (wi::to_widest (scale_term) + mult_precision + 2
3272 != TYPE_PRECISION (lhs_type))
3273 return NULL;
3274
3275 ifn = IFN_MULHRS;
3276 }
3277 else
3278 {
3279 /* Check for pattern 1). */
3280 if (wi::to_widest (scale_term) + mult_precision + 1
3281 == TYPE_PRECISION (lhs_type))
3282 ifn = IFN_MULHS;
3283 /* Check for pattern 3). */
3284 else if (wi::to_widest (scale_term) + mult_precision
3285 == TYPE_PRECISION (lhs_type))
3286 ifn = IFN_MULH;
3287 else
3288 return NULL;
3289 }
3290
3291 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
3292
3293 /* Check for target support. */
3294 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3295 if (!new_vectype
3296 || !direct_internal_fn_supported_p
3297 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3298 return NULL;
3299
3300 /* The IR requires a valid vector type for the cast result, even though
3301 it's likely to be discarded. */
3302 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3303 if (!*type_out)
3304 return NULL;
3305
3306 /* Generate the IFN_MULHRS call. */
3307 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3308 tree new_ops[2];
3309 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3310 unprom_mult, new_vectype);
3311 gcall *mulhrs_stmt
3312 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3313 gimple_call_set_lhs (mulhrs_stmt, new_var);
3314 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
3315
3316 if (dump_enabled_p ())
3317 dump_printf_loc (MSG_NOTE, vect_location,
3318 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3319
3320 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
3321 mulhrs_stmt, new_vectype);
3322 }
3323
3324 /* Recognize the patterns:
3325
3326 ATYPE a; // narrower than TYPE
3327 BTYPE b; // narrower than TYPE
3328 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3329 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3330
3331 where only the bottom half of avg is used. Try to transform them into:
3332
3333 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3334 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3335
3336 followed by:
3337
3338 TYPE avg = (TYPE) avg';
3339
3340 where NTYPE is no wider than half of TYPE. Since only the bottom half
3341 of avg is used, all or part of the cast of avg' should become redundant.
3342
3343 If there is no target support available, generate code to distribute rshift
3344 over plus and add a carry. */
3345
3346 static gimple *
3347 vect_recog_average_pattern (vec_info *vinfo,
3348 stmt_vec_info last_stmt_info, tree *type_out)
3349 {
3350 /* Check for a shift right by one bit. */
3351 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3352 if (!last_stmt
3353 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
3354 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
3355 return NULL;
3356
3357 /* Check that the shift result is wider than the users of the
3358 result need (i.e. that narrowing would be a natural choice). */
3359 tree lhs = gimple_assign_lhs (last_stmt);
3360 tree type = TREE_TYPE (lhs);
3361 unsigned int target_precision
3362 = vect_element_precision (last_stmt_info->min_output_precision);
3363 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3364 return NULL;
3365
3366 /* Look through any change in sign on the shift input. */
3367 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
3368 vect_unpromoted_value unprom_plus;
3369 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
3370 &unprom_plus);
3371 if (!rshift_rhs
3372 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3373 return NULL;
3374
3375 /* Get the definition of the shift input. */
3376 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
3377 if (!plus_stmt_info)
3378 return NULL;
3379
3380 /* Check whether the shift input can be seen as a tree of additions on
3381 2 or 3 widened inputs.
3382
3383 Note that the pattern should be a win even if the result of one or
3384 more additions is reused elsewhere: if the pattern matches, we'd be
3385 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3386 internal_fn ifn = IFN_AVG_FLOOR;
3387 vect_unpromoted_value unprom[3];
3388 tree new_type;
3389 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
3390 IFN_VEC_WIDEN_PLUS, false, 3,
3391 unprom, &new_type);
3392 if (nops == 0)
3393 return NULL;
3394 if (nops == 3)
3395 {
3396 /* Check that one operand is 1. */
3397 unsigned int i;
3398 for (i = 0; i < 3; ++i)
3399 if (integer_onep (unprom[i].op))
3400 break;
3401 if (i == 3)
3402 return NULL;
3403 /* Throw away the 1 operand and keep the other two. */
3404 if (i < 2)
3405 unprom[i] = unprom[2];
3406 ifn = IFN_AVG_CEIL;
3407 }
3408
3409 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
3410
3411 /* We know that:
3412
3413 (a) the operation can be viewed as:
3414
3415 TYPE widened0 = (TYPE) UNPROM[0];
3416 TYPE widened1 = (TYPE) UNPROM[1];
3417 TYPE tmp1 = widened0 + widened1 {+ 1};
3418 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3419
3420 (b) the first two statements are equivalent to:
3421
3422 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3423 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3424
3425 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3426 where sensible;
3427
3428 (d) all the operations can be performed correctly at twice the width of
3429 NEW_TYPE, due to the nature of the average operation; and
3430
3431 (e) users of the result of the right shift need only TARGET_PRECISION
3432 bits, where TARGET_PRECISION is no more than half of TYPE's
3433 precision.
3434
3435 Under these circumstances, the only situation in which NEW_TYPE
3436 could be narrower than TARGET_PRECISION is if widened0, widened1
3437 and an addition result are all used more than once. Thus we can
3438 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3439 as "free", whereas widening the result of the average instruction
3440 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3441 therefore better not to go narrower than TARGET_PRECISION. */
3442 if (TYPE_PRECISION (new_type) < target_precision)
3443 new_type = build_nonstandard_integer_type (target_precision,
3444 TYPE_UNSIGNED (new_type));
3445
3446 /* Check for target support. */
3447 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3448 if (!new_vectype)
3449 return NULL;
3450
3451 bool fallback_p = false;
3452
3453 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3454 ;
3455 else if (TYPE_UNSIGNED (new_type)
3456 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3457 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3458 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3459 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3460 fallback_p = true;
3461 else
3462 return NULL;
3463
3464 /* The IR requires a valid vector type for the cast result, even though
3465 it's likely to be discarded. */
3466 *type_out = get_vectype_for_scalar_type (vinfo, type);
3467 if (!*type_out)
3468 return NULL;
3469
3470 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3471 tree new_ops[2];
3472 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3473 unprom, new_vectype);
3474
3475 if (fallback_p)
3476 {
3477 /* As a fallback, generate code for following sequence:
3478
3479 shifted_op0 = new_ops[0] >> 1;
3480 shifted_op1 = new_ops[1] >> 1;
3481 sum_of_shifted = shifted_op0 + shifted_op1;
3482 unmasked_carry = new_ops[0] and/or new_ops[1];
3483 carry = unmasked_carry & 1;
3484 new_var = sum_of_shifted + carry;
3485 */
3486
3487 tree one_cst = build_one_cst (new_type);
3488 gassign *g;
3489
3490 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
3491 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3492 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3493
3494 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
3495 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3496 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3497
3498 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
3499 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3500 shifted_op0, shifted_op1);
3501 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3502
3503 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
3504 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3505 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3506 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3507
3508 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
3509 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3510 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3511
3512 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3513 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
3514 }
3515
3516 /* Generate the IFN_AVG* call. */
3517 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3518 new_ops[1]);
3519 gimple_call_set_lhs (average_stmt, new_var);
3520 gimple_set_location (average_stmt, gimple_location (last_stmt));
3521
3522 if (dump_enabled_p ())
3523 dump_printf_loc (MSG_NOTE, vect_location,
3524 "created pattern stmt: %G", (gimple *) average_stmt);
3525
3526 return vect_convert_output (vinfo, last_stmt_info,
3527 type, average_stmt, new_vectype);
3528 }
3529
3530 /* Recognize cases in which the input to a cast is wider than its
3531 output, and the input is fed by a widening operation. Fold this
3532 by removing the unnecessary intermediate widening. E.g.:
3533
3534 unsigned char a;
3535 unsigned int b = (unsigned int) a;
3536 unsigned short c = (unsigned short) b;
3537
3538 -->
3539
3540 unsigned short c = (unsigned short) a;
3541
3542 Although this is rare in input IR, it is an expected side-effect
3543 of the over-widening pattern above.
3544
3545 This is beneficial also for integer-to-float conversions, if the
3546 widened integer has more bits than the float, and if the unwidened
3547 input doesn't. */
3548
3549 static gimple *
3550 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3551 stmt_vec_info last_stmt_info, tree *type_out)
3552 {
3553 /* Check for a cast, including an integer-to-float conversion. */
3554 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3555 if (!last_stmt)
3556 return NULL;
3557 tree_code code = gimple_assign_rhs_code (last_stmt);
3558 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3559 return NULL;
3560
3561 /* Make sure that the rhs is a scalar with a natural bitsize. */
3562 tree lhs = gimple_assign_lhs (last_stmt);
3563 if (!lhs)
3564 return NULL;
3565 tree lhs_type = TREE_TYPE (lhs);
3566 scalar_mode lhs_mode;
3567 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3568 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
3569 return NULL;
3570
3571 /* Check for a narrowing operation (from a vector point of view). */
3572 tree rhs = gimple_assign_rhs1 (last_stmt);
3573 tree rhs_type = TREE_TYPE (rhs);
3574 if (!INTEGRAL_TYPE_P (rhs_type)
3575 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3576 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
3577 return NULL;
3578
3579 /* Try to find an unpromoted input. */
3580 vect_unpromoted_value unprom;
3581 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
3582 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3583 return NULL;
3584
3585 /* If the bits above RHS_TYPE matter, make sure that they're the
3586 same when extending from UNPROM as they are when extending from RHS. */
3587 if (!INTEGRAL_TYPE_P (lhs_type)
3588 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3589 return NULL;
3590
3591 /* We can get the same result by casting UNPROM directly, to avoid
3592 the unnecessary widening and narrowing. */
3593 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
3594
3595 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3596 if (!*type_out)
3597 return NULL;
3598
3599 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
3600 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3601 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3602
3603 return pattern_stmt;
3604 }
3605
3606 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3607 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3608
3609 static gimple *
3610 vect_recog_widen_shift_pattern (vec_info *vinfo,
3611 stmt_vec_info last_stmt_info, tree *type_out)
3612 {
3613 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3614 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
3615 "vect_recog_widen_shift_pattern");
3616 }
3617
3618 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3619
3620 type a_t, b_t, c_t;
3621
3622 S0 a_t = b_t r<< c_t;
3623
3624 Input/Output:
3625
3626 * STMT_VINFO: The stmt from which the pattern search begins,
3627 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3628 with a sequence:
3629
3630 S1 d_t = -c_t;
3631 S2 e_t = d_t & (B - 1);
3632 S3 f_t = b_t << c_t;
3633 S4 g_t = b_t >> e_t;
3634 S0 a_t = f_t | g_t;
3635
3636 where B is element bitsize of type.
3637
3638 Output:
3639
3640 * TYPE_OUT: The type of the output of this pattern.
3641
3642 * Return value: A new stmt that will be used to replace the rotate
3643 S0 stmt. */
3644
3645 static gimple *
3646 vect_recog_rotate_pattern (vec_info *vinfo,
3647 stmt_vec_info stmt_vinfo, tree *type_out)
3648 {
3649 gimple *last_stmt = stmt_vinfo->stmt;
3650 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3651 gimple *pattern_stmt, *def_stmt;
3652 enum tree_code rhs_code;
3653 enum vect_def_type dt;
3654 optab optab1, optab2;
3655 edge ext_def = NULL;
3656 bool bswap16_p = false;
3657
3658 if (is_gimple_assign (last_stmt))
3659 {
3660 rhs_code = gimple_assign_rhs_code (last_stmt);
3661 switch (rhs_code)
3662 {
3663 case LROTATE_EXPR:
3664 case RROTATE_EXPR:
3665 break;
3666 default:
3667 return NULL;
3668 }
3669
3670 lhs = gimple_assign_lhs (last_stmt);
3671 oprnd0 = gimple_assign_rhs1 (last_stmt);
3672 type = TREE_TYPE (oprnd0);
3673 oprnd1 = gimple_assign_rhs2 (last_stmt);
3674 }
3675 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3676 {
3677 /* __builtin_bswap16 (x) is another form of x r>> 8.
3678 The vectorizer has bswap support, but only if the argument isn't
3679 promoted. */
3680 lhs = gimple_call_lhs (last_stmt);
3681 oprnd0 = gimple_call_arg (last_stmt, 0);
3682 type = TREE_TYPE (oprnd0);
3683 if (!lhs
3684 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3685 || TYPE_PRECISION (type) <= 16
3686 || TREE_CODE (oprnd0) != SSA_NAME
3687 || BITS_PER_UNIT != 8)
3688 return NULL;
3689
3690 stmt_vec_info def_stmt_info;
3691 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3692 return NULL;
3693
3694 if (dt != vect_internal_def)
3695 return NULL;
3696
3697 if (gimple_assign_cast_p (def_stmt))
3698 {
3699 def = gimple_assign_rhs1 (def_stmt);
3700 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3701 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3702 oprnd0 = def;
3703 }
3704
3705 type = TREE_TYPE (lhs);
3706 vectype = get_vectype_for_scalar_type (vinfo, type);
3707 if (vectype == NULL_TREE)
3708 return NULL;
3709
3710 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3711 {
3712 /* The encoding uses one stepped pattern for each byte in the
3713 16-bit word. */
3714 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
3715 for (unsigned i = 0; i < 3; ++i)
3716 for (unsigned j = 0; j < 2; ++j)
3717 elts.quick_push ((i + 1) * 2 - j - 1);
3718
3719 vec_perm_indices indices (elts, 1,
3720 TYPE_VECTOR_SUBPARTS (char_vectype));
3721 machine_mode vmode = TYPE_MODE (char_vectype);
3722 if (can_vec_perm_const_p (vmode, vmode, indices))
3723 {
3724 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3725 undo the argument promotion. */
3726 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3727 {
3728 def = vect_recog_temp_ssa_var (type, NULL);
3729 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3730 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3731 oprnd0 = def;
3732 }
3733
3734 /* Pattern detected. */
3735 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3736
3737 *type_out = vectype;
3738
3739 /* Pattern supported. Create a stmt to be used to replace the
3740 pattern, with the unpromoted argument. */
3741 var = vect_recog_temp_ssa_var (type, NULL);
3742 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3743 1, oprnd0);
3744 gimple_call_set_lhs (pattern_stmt, var);
3745 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3746 gimple_call_fntype (last_stmt));
3747 return pattern_stmt;
3748 }
3749 }
3750
3751 oprnd1 = build_int_cst (integer_type_node, 8);
3752 rhs_code = LROTATE_EXPR;
3753 bswap16_p = true;
3754 }
3755 else
3756 return NULL;
3757
3758 if (TREE_CODE (oprnd0) != SSA_NAME
3759 || !INTEGRAL_TYPE_P (type)
3760 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
3761 return NULL;
3762
3763 stmt_vec_info def_stmt_info;
3764 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3765 return NULL;
3766
3767 if (dt != vect_internal_def
3768 && dt != vect_constant_def
3769 && dt != vect_external_def)
3770 return NULL;
3771
3772 vectype = get_vectype_for_scalar_type (vinfo, type);
3773 if (vectype == NULL_TREE)
3774 return NULL;
3775
3776 /* If vector/vector or vector/scalar rotate is supported by the target,
3777 don't do anything here. */
3778 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3779 if (optab1
3780 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3781 {
3782 use_rotate:
3783 if (bswap16_p)
3784 {
3785 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3786 {
3787 def = vect_recog_temp_ssa_var (type, NULL);
3788 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3789 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3790 oprnd0 = def;
3791 }
3792
3793 /* Pattern detected. */
3794 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3795
3796 *type_out = vectype;
3797
3798 /* Pattern supported. Create a stmt to be used to replace the
3799 pattern. */
3800 var = vect_recog_temp_ssa_var (type, NULL);
3801 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3802 oprnd1);
3803 return pattern_stmt;
3804 }
3805 return NULL;
3806 }
3807
3808 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3809 {
3810 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3811 if (optab2
3812 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3813 goto use_rotate;
3814 }
3815
3816 tree utype = unsigned_type_for (type);
3817 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3818 if (!uvectype)
3819 return NULL;
3820
3821 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3822 don't do anything here either. */
3823 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3824 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3825 if (!optab1
3826 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3827 || !optab2
3828 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3829 {
3830 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3831 return NULL;
3832 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3833 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3834 if (!optab1
3835 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3836 || !optab2
3837 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3838 return NULL;
3839 }
3840
3841 *type_out = vectype;
3842
3843 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3844 {
3845 def = vect_recog_temp_ssa_var (utype, NULL);
3846 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3847 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3848 oprnd0 = def;
3849 }
3850
3851 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3852 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3853
3854 def = NULL_TREE;
3855 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3856 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3857 def = oprnd1;
3858 else if (def_stmt && gimple_assign_cast_p (def_stmt))
3859 {
3860 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3861 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3862 && TYPE_PRECISION (TREE_TYPE (rhs1))
3863 == TYPE_PRECISION (type))
3864 def = rhs1;
3865 }
3866
3867 if (def == NULL_TREE)
3868 {
3869 def = vect_recog_temp_ssa_var (utype, NULL);
3870 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3871 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3872 }
3873 stype = TREE_TYPE (def);
3874
3875 if (TREE_CODE (def) == INTEGER_CST)
3876 {
3877 if (!tree_fits_uhwi_p (def)
3878 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3879 || integer_zerop (def))
3880 return NULL;
3881 def2 = build_int_cst (stype,
3882 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3883 }
3884 else
3885 {
3886 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3887
3888 if (vecstype == NULL_TREE)
3889 return NULL;
3890 def2 = vect_recog_temp_ssa_var (stype, NULL);
3891 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3892 if (ext_def)
3893 {
3894 basic_block new_bb
3895 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3896 gcc_assert (!new_bb);
3897 }
3898 else
3899 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3900
3901 def2 = vect_recog_temp_ssa_var (stype, NULL);
3902 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3903 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3904 gimple_assign_lhs (def_stmt), mask);
3905 if (ext_def)
3906 {
3907 basic_block new_bb
3908 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3909 gcc_assert (!new_bb);
3910 }
3911 else
3912 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3913 }
3914
3915 var1 = vect_recog_temp_ssa_var (utype, NULL);
3916 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
3917 ? LSHIFT_EXPR : RSHIFT_EXPR,
3918 oprnd0, def);
3919 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3920
3921 var2 = vect_recog_temp_ssa_var (utype, NULL);
3922 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
3923 ? RSHIFT_EXPR : LSHIFT_EXPR,
3924 oprnd0, def2);
3925 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3926
3927 /* Pattern detected. */
3928 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3929
3930 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3931 var = vect_recog_temp_ssa_var (utype, NULL);
3932 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
3933
3934 if (!useless_type_conversion_p (type, utype))
3935 {
3936 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
3937 tree result = vect_recog_temp_ssa_var (type, NULL);
3938 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
3939 }
3940 return pattern_stmt;
3941 }
3942
3943 /* Detect a vector by vector shift pattern that wouldn't be otherwise
3944 vectorized:
3945
3946 type a_t;
3947 TYPE b_T, res_T;
3948
3949 S1 a_t = ;
3950 S2 b_T = ;
3951 S3 res_T = b_T op a_t;
3952
3953 where type 'TYPE' is a type with different size than 'type',
3954 and op is <<, >> or rotate.
3955
3956 Also detect cases:
3957
3958 type a_t;
3959 TYPE b_T, c_T, res_T;
3960
3961 S0 c_T = ;
3962 S1 a_t = (type) c_T;
3963 S2 b_T = ;
3964 S3 res_T = b_T op a_t;
3965
3966 Input/Output:
3967
3968 * STMT_VINFO: The stmt from which the pattern search begins,
3969 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
3970 with a shift/rotate which has same type on both operands, in the
3971 second case just b_T op c_T, in the first case with added cast
3972 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
3973
3974 Output:
3975
3976 * TYPE_OUT: The type of the output of this pattern.
3977
3978 * Return value: A new stmt that will be used to replace the shift/rotate
3979 S3 stmt. */
3980
3981 static gimple *
3982 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
3983 stmt_vec_info stmt_vinfo,
3984 tree *type_out)
3985 {
3986 gimple *last_stmt = stmt_vinfo->stmt;
3987 tree oprnd0, oprnd1, lhs, var;
3988 gimple *pattern_stmt;
3989 enum tree_code rhs_code;
3990
3991 if (!is_gimple_assign (last_stmt))
3992 return NULL;
3993
3994 rhs_code = gimple_assign_rhs_code (last_stmt);
3995 switch (rhs_code)
3996 {
3997 case LSHIFT_EXPR:
3998 case RSHIFT_EXPR:
3999 case LROTATE_EXPR:
4000 case RROTATE_EXPR:
4001 break;
4002 default:
4003 return NULL;
4004 }
4005
4006 lhs = gimple_assign_lhs (last_stmt);
4007 oprnd0 = gimple_assign_rhs1 (last_stmt);
4008 oprnd1 = gimple_assign_rhs2 (last_stmt);
4009 if (TREE_CODE (oprnd0) != SSA_NAME
4010 || TREE_CODE (oprnd1) != SSA_NAME
4011 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
4012 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
4013 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
4014 || TYPE_PRECISION (TREE_TYPE (lhs))
4015 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
4016 return NULL;
4017
4018 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
4019 if (!def_vinfo)
4020 return NULL;
4021
4022 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
4023 if (*type_out == NULL_TREE)
4024 return NULL;
4025
4026 tree def = NULL_TREE;
4027 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
4028 if (def_stmt && gimple_assign_cast_p (def_stmt))
4029 {
4030 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4031 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
4032 && TYPE_PRECISION (TREE_TYPE (rhs1))
4033 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
4034 {
4035 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
4036 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
4037 def = rhs1;
4038 else
4039 {
4040 tree mask
4041 = build_low_bits_mask (TREE_TYPE (rhs1),
4042 TYPE_PRECISION (TREE_TYPE (oprnd1)));
4043 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4044 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
4045 tree vecstype = get_vectype_for_scalar_type (vinfo,
4046 TREE_TYPE (rhs1));
4047 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4048 }
4049 }
4050 }
4051
4052 if (def == NULL_TREE)
4053 {
4054 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4055 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
4056 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4057 }
4058
4059 /* Pattern detected. */
4060 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
4061
4062 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4063 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4064 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
4065
4066 return pattern_stmt;
4067 }
4068
4069 /* Return true iff the target has a vector optab implementing the operation
4070 CODE on type VECTYPE. */
4071
4072 static bool
4073 target_has_vecop_for_code (tree_code code, tree vectype)
4074 {
4075 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
4076 return voptab
4077 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
4078 }
4079
4080 /* Verify that the target has optabs of VECTYPE to perform all the steps
4081 needed by the multiplication-by-immediate synthesis algorithm described by
4082 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4083 present. Return true iff the target supports all the steps. */
4084
4085 static bool
4086 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
4087 tree vectype, bool synth_shift_p)
4088 {
4089 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
4090 return false;
4091
4092 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
4093 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
4094
4095 if (var == negate_variant
4096 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
4097 return false;
4098
4099 /* If we must synthesize shifts with additions make sure that vector
4100 addition is available. */
4101 if ((var == add_variant || synth_shift_p) && !supports_vplus)
4102 return false;
4103
4104 for (int i = 1; i < alg->ops; i++)
4105 {
4106 switch (alg->op[i])
4107 {
4108 case alg_shift:
4109 break;
4110 case alg_add_t_m2:
4111 case alg_add_t2_m:
4112 case alg_add_factor:
4113 if (!supports_vplus)
4114 return false;
4115 break;
4116 case alg_sub_t_m2:
4117 case alg_sub_t2_m:
4118 case alg_sub_factor:
4119 if (!supports_vminus)
4120 return false;
4121 break;
4122 case alg_unknown:
4123 case alg_m:
4124 case alg_zero:
4125 case alg_impossible:
4126 return false;
4127 default:
4128 gcc_unreachable ();
4129 }
4130 }
4131
4132 return true;
4133 }
4134
4135 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4136 putting the final result in DEST. Append all statements but the last into
4137 VINFO. Return the last statement. */
4138
4139 static gimple *
4140 synth_lshift_by_additions (vec_info *vinfo,
4141 tree dest, tree op, HOST_WIDE_INT amnt,
4142 stmt_vec_info stmt_info)
4143 {
4144 HOST_WIDE_INT i;
4145 tree itype = TREE_TYPE (op);
4146 tree prev_res = op;
4147 gcc_assert (amnt >= 0);
4148 for (i = 0; i < amnt; i++)
4149 {
4150 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
4151 : dest;
4152 gimple *stmt
4153 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
4154 prev_res = tmp_var;
4155 if (i < amnt - 1)
4156 append_pattern_def_seq (vinfo, stmt_info, stmt);
4157 else
4158 return stmt;
4159 }
4160 gcc_unreachable ();
4161 return NULL;
4162 }
4163
4164 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4165 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4166 the process if necessary. Append the resulting assignment statements
4167 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4168 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4169 left shifts using additions. */
4170
4171 static tree
4172 apply_binop_and_append_stmt (vec_info *vinfo,
4173 tree_code code, tree op1, tree op2,
4174 stmt_vec_info stmt_vinfo, bool synth_shift_p)
4175 {
4176 if (integer_zerop (op2)
4177 && (code == LSHIFT_EXPR
4178 || code == PLUS_EXPR))
4179 {
4180 gcc_assert (TREE_CODE (op1) == SSA_NAME);
4181 return op1;
4182 }
4183
4184 gimple *stmt;
4185 tree itype = TREE_TYPE (op1);
4186 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
4187
4188 if (code == LSHIFT_EXPR
4189 && synth_shift_p)
4190 {
4191 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
4192 TREE_INT_CST_LOW (op2), stmt_vinfo);
4193 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4194 return tmp_var;
4195 }
4196
4197 stmt = gimple_build_assign (tmp_var, code, op1, op2);
4198 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4199 return tmp_var;
4200 }
4201
4202 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4203 and simple arithmetic operations to be vectorized. Record the statements
4204 produced in STMT_VINFO and return the last statement in the sequence or
4205 NULL if it's not possible to synthesize such a multiplication.
4206 This function mirrors the behavior of expand_mult_const in expmed.cc but
4207 works on tree-ssa form. */
4208
4209 static gimple *
4210 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
4211 stmt_vec_info stmt_vinfo)
4212 {
4213 tree itype = TREE_TYPE (op);
4214 machine_mode mode = TYPE_MODE (itype);
4215 struct algorithm alg;
4216 mult_variant variant;
4217 if (!tree_fits_shwi_p (val))
4218 return NULL;
4219
4220 /* Multiplication synthesis by shifts, adds and subs can introduce
4221 signed overflow where the original operation didn't. Perform the
4222 operations on an unsigned type and cast back to avoid this.
4223 In the future we may want to relax this for synthesis algorithms
4224 that we can prove do not cause unexpected overflow. */
4225 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
4226
4227 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
4228 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
4229 if (!vectype)
4230 return NULL;
4231
4232 /* Targets that don't support vector shifts but support vector additions
4233 can synthesize shifts that way. */
4234 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
4235
4236 HOST_WIDE_INT hwval = tree_to_shwi (val);
4237 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4238 The vectorizer's benefit analysis will decide whether it's beneficial
4239 to do this. */
4240 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
4241 ? TYPE_MODE (vectype) : mode,
4242 hwval, &alg, &variant, MAX_COST);
4243 if (!possible)
4244 return NULL;
4245
4246 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
4247 return NULL;
4248
4249 tree accumulator;
4250
4251 /* Clear out the sequence of statements so we can populate it below. */
4252 gimple *stmt = NULL;
4253
4254 if (cast_to_unsigned_p)
4255 {
4256 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
4257 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
4258 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4259 op = tmp_op;
4260 }
4261
4262 if (alg.op[0] == alg_zero)
4263 accumulator = build_int_cst (multtype, 0);
4264 else
4265 accumulator = op;
4266
4267 bool needs_fixup = (variant == negate_variant)
4268 || (variant == add_variant);
4269
4270 for (int i = 1; i < alg.ops; i++)
4271 {
4272 tree shft_log = build_int_cst (multtype, alg.log[i]);
4273 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4274 tree tmp_var = NULL_TREE;
4275
4276 switch (alg.op[i])
4277 {
4278 case alg_shift:
4279 if (synth_shift_p)
4280 stmt
4281 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
4282 alg.log[i], stmt_vinfo);
4283 else
4284 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
4285 shft_log);
4286 break;
4287 case alg_add_t_m2:
4288 tmp_var
4289 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
4290 stmt_vinfo, synth_shift_p);
4291 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4292 tmp_var);
4293 break;
4294 case alg_sub_t_m2:
4295 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
4296 shft_log, stmt_vinfo,
4297 synth_shift_p);
4298 /* In some algorithms the first step involves zeroing the
4299 accumulator. If subtracting from such an accumulator
4300 just emit the negation directly. */
4301 if (integer_zerop (accumulator))
4302 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
4303 else
4304 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
4305 tmp_var);
4306 break;
4307 case alg_add_t2_m:
4308 tmp_var
4309 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4310 shft_log, stmt_vinfo, synth_shift_p);
4311 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4312 break;
4313 case alg_sub_t2_m:
4314 tmp_var
4315 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4316 shft_log, stmt_vinfo, synth_shift_p);
4317 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4318 break;
4319 case alg_add_factor:
4320 tmp_var
4321 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4322 shft_log, stmt_vinfo, synth_shift_p);
4323 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4324 tmp_var);
4325 break;
4326 case alg_sub_factor:
4327 tmp_var
4328 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4329 shft_log, stmt_vinfo, synth_shift_p);
4330 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4331 accumulator);
4332 break;
4333 default:
4334 gcc_unreachable ();
4335 }
4336 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4337 but rather return it directly. */
4338
4339 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4340 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4341 accumulator = accum_tmp;
4342 }
4343 if (variant == negate_variant)
4344 {
4345 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4346 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4347 accumulator = accum_tmp;
4348 if (cast_to_unsigned_p)
4349 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4350 }
4351 else if (variant == add_variant)
4352 {
4353 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4354 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4355 accumulator = accum_tmp;
4356 if (cast_to_unsigned_p)
4357 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4358 }
4359 /* Move back to a signed if needed. */
4360 if (cast_to_unsigned_p)
4361 {
4362 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
4363 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4364 }
4365
4366 return stmt;
4367 }
4368
4369 /* Detect multiplication by constant and convert it into a sequence of
4370 shifts and additions, subtractions, negations. We reuse the
4371 choose_mult_variant algorithms from expmed.cc
4372
4373 Input/Output:
4374
4375 STMT_VINFO: The stmt from which the pattern search begins,
4376 i.e. the mult stmt.
4377
4378 Output:
4379
4380 * TYPE_OUT: The type of the output of this pattern.
4381
4382 * Return value: A new stmt that will be used to replace
4383 the multiplication. */
4384
4385 static gimple *
4386 vect_recog_mult_pattern (vec_info *vinfo,
4387 stmt_vec_info stmt_vinfo, tree *type_out)
4388 {
4389 gimple *last_stmt = stmt_vinfo->stmt;
4390 tree oprnd0, oprnd1, vectype, itype;
4391 gimple *pattern_stmt;
4392
4393 if (!is_gimple_assign (last_stmt))
4394 return NULL;
4395
4396 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
4397 return NULL;
4398
4399 oprnd0 = gimple_assign_rhs1 (last_stmt);
4400 oprnd1 = gimple_assign_rhs2 (last_stmt);
4401 itype = TREE_TYPE (oprnd0);
4402
4403 if (TREE_CODE (oprnd0) != SSA_NAME
4404 || TREE_CODE (oprnd1) != INTEGER_CST
4405 || !INTEGRAL_TYPE_P (itype)
4406 || !type_has_mode_precision_p (itype))
4407 return NULL;
4408
4409 vectype = get_vectype_for_scalar_type (vinfo, itype);
4410 if (vectype == NULL_TREE)
4411 return NULL;
4412
4413 /* If the target can handle vectorized multiplication natively,
4414 don't attempt to optimize this. */
4415 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4416 if (mul_optab != unknown_optab)
4417 {
4418 machine_mode vec_mode = TYPE_MODE (vectype);
4419 int icode = (int) optab_handler (mul_optab, vec_mode);
4420 if (icode != CODE_FOR_nothing)
4421 return NULL;
4422 }
4423
4424 pattern_stmt = vect_synth_mult_by_constant (vinfo,
4425 oprnd0, oprnd1, stmt_vinfo);
4426 if (!pattern_stmt)
4427 return NULL;
4428
4429 /* Pattern detected. */
4430 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
4431
4432 *type_out = vectype;
4433
4434 return pattern_stmt;
4435 }
4436
4437 /* Detect a signed division by a constant that wouldn't be
4438 otherwise vectorized:
4439
4440 type a_t, b_t;
4441
4442 S1 a_t = b_t / N;
4443
4444 where type 'type' is an integral type and N is a constant.
4445
4446 Similarly handle modulo by a constant:
4447
4448 S4 a_t = b_t % N;
4449
4450 Input/Output:
4451
4452 * STMT_VINFO: The stmt from which the pattern search begins,
4453 i.e. the division stmt. S1 is replaced by if N is a power
4454 of two constant and type is signed:
4455 S3 y_t = b_t < 0 ? N - 1 : 0;
4456 S2 x_t = b_t + y_t;
4457 S1' a_t = x_t >> log2 (N);
4458
4459 S4 is replaced if N is a power of two constant and
4460 type is signed by (where *_T temporaries have unsigned type):
4461 S9 y_T = b_t < 0 ? -1U : 0U;
4462 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4463 S7 z_t = (type) z_T;
4464 S6 w_t = b_t + z_t;
4465 S5 x_t = w_t & (N - 1);
4466 S4' a_t = x_t - z_t;
4467
4468 Output:
4469
4470 * TYPE_OUT: The type of the output of this pattern.
4471
4472 * Return value: A new stmt that will be used to replace the division
4473 S1 or modulo S4 stmt. */
4474
4475 static gimple *
4476 vect_recog_divmod_pattern (vec_info *vinfo,
4477 stmt_vec_info stmt_vinfo, tree *type_out)
4478 {
4479 gimple *last_stmt = stmt_vinfo->stmt;
4480 tree oprnd0, oprnd1, vectype, itype, cond;
4481 gimple *pattern_stmt, *def_stmt;
4482 enum tree_code rhs_code;
4483 optab optab;
4484 tree q, cst;
4485 int dummy_int, prec;
4486
4487 if (!is_gimple_assign (last_stmt))
4488 return NULL;
4489
4490 rhs_code = gimple_assign_rhs_code (last_stmt);
4491 switch (rhs_code)
4492 {
4493 case TRUNC_DIV_EXPR:
4494 case EXACT_DIV_EXPR:
4495 case TRUNC_MOD_EXPR:
4496 break;
4497 default:
4498 return NULL;
4499 }
4500
4501 oprnd0 = gimple_assign_rhs1 (last_stmt);
4502 oprnd1 = gimple_assign_rhs2 (last_stmt);
4503 itype = TREE_TYPE (oprnd0);
4504 if (TREE_CODE (oprnd0) != SSA_NAME
4505 || TREE_CODE (oprnd1) != INTEGER_CST
4506 || TREE_CODE (itype) != INTEGER_TYPE
4507 || !type_has_mode_precision_p (itype))
4508 return NULL;
4509
4510 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
4511 vectype = get_vectype_for_scalar_type (vinfo, itype);
4512 if (vectype == NULL_TREE)
4513 return NULL;
4514
4515 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
4516 {
4517 /* If the target can handle vectorized division or modulo natively,
4518 don't attempt to optimize this, since native division is likely
4519 to give smaller code. */
4520 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
4521 if (optab != unknown_optab)
4522 {
4523 machine_mode vec_mode = TYPE_MODE (vectype);
4524 int icode = (int) optab_handler (optab, vec_mode);
4525 if (icode != CODE_FOR_nothing)
4526 return NULL;
4527 }
4528 }
4529
4530 prec = TYPE_PRECISION (itype);
4531 if (integer_pow2p (oprnd1))
4532 {
4533 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
4534 return NULL;
4535
4536 /* Pattern detected. */
4537 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4538
4539 *type_out = vectype;
4540
4541 /* Check if the target supports this internal function. */
4542 internal_fn ifn = IFN_DIV_POW2;
4543 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
4544 {
4545 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
4546
4547 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
4548 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
4549 gimple_call_set_lhs (div_stmt, var_div);
4550
4551 if (rhs_code == TRUNC_MOD_EXPR)
4552 {
4553 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
4554 def_stmt
4555 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4556 LSHIFT_EXPR, var_div, shift);
4557 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4558 pattern_stmt
4559 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4560 MINUS_EXPR, oprnd0,
4561 gimple_assign_lhs (def_stmt));
4562 }
4563 else
4564 pattern_stmt = div_stmt;
4565 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
4566
4567 return pattern_stmt;
4568 }
4569
4570 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
4571 build_int_cst (itype, 0));
4572 if (rhs_code == TRUNC_DIV_EXPR
4573 || rhs_code == EXACT_DIV_EXPR)
4574 {
4575 tree var = vect_recog_temp_ssa_var (itype, NULL);
4576 tree shift;
4577 def_stmt
4578 = gimple_build_assign (var, COND_EXPR, cond,
4579 fold_build2 (MINUS_EXPR, itype, oprnd1,
4580 build_int_cst (itype, 1)),
4581 build_int_cst (itype, 0));
4582 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4583 var = vect_recog_temp_ssa_var (itype, NULL);
4584 def_stmt
4585 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
4586 gimple_assign_lhs (def_stmt));
4587 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4588
4589 shift = build_int_cst (itype, tree_log2 (oprnd1));
4590 pattern_stmt
4591 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4592 RSHIFT_EXPR, var, shift);
4593 }
4594 else
4595 {
4596 tree signmask;
4597 if (compare_tree_int (oprnd1, 2) == 0)
4598 {
4599 signmask = vect_recog_temp_ssa_var (itype, NULL);
4600 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
4601 build_int_cst (itype, 1),
4602 build_int_cst (itype, 0));
4603 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4604 }
4605 else
4606 {
4607 tree utype
4608 = build_nonstandard_integer_type (prec, 1);
4609 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
4610 tree shift
4611 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
4612 - tree_log2 (oprnd1));
4613 tree var = vect_recog_temp_ssa_var (utype, NULL);
4614
4615 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
4616 build_int_cst (utype, -1),
4617 build_int_cst (utype, 0));
4618 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4619 var = vect_recog_temp_ssa_var (utype, NULL);
4620 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
4621 gimple_assign_lhs (def_stmt),
4622 shift);
4623 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4624 signmask = vect_recog_temp_ssa_var (itype, NULL);
4625 def_stmt
4626 = gimple_build_assign (signmask, NOP_EXPR, var);
4627 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4628 }
4629 def_stmt
4630 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4631 PLUS_EXPR, oprnd0, signmask);
4632 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4633 def_stmt
4634 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4635 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
4636 fold_build2 (MINUS_EXPR, itype, oprnd1,
4637 build_int_cst (itype, 1)));
4638 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4639
4640 pattern_stmt
4641 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4642 MINUS_EXPR, gimple_assign_lhs (def_stmt),
4643 signmask);
4644 }
4645
4646 return pattern_stmt;
4647 }
4648
4649 if ((cst = uniform_integer_cst_p (oprnd1))
4650 && TYPE_UNSIGNED (itype)
4651 && rhs_code == TRUNC_DIV_EXPR
4652 && vectype
4653 && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
4654 {
4655 /* We can use the relationship:
4656
4657 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4658
4659 to optimize cases where N+1 is a power of 2, and where // (N+1)
4660 is therefore a shift right. When operating in modes that are
4661 multiples of a byte in size, there are two cases:
4662
4663 (1) N(N+3) is not representable, in which case the question
4664 becomes whether the replacement expression overflows.
4665 It is enough to test that x+N+2 does not overflow,
4666 i.e. that x < MAX-(N+1).
4667
4668 (2) N(N+3) is representable, in which case it is the (only)
4669 bound that we need to check.
4670
4671 ??? For now we just handle the case where // (N+1) is a shift
4672 right by half the precision, since some architectures can
4673 optimize the associated addition and shift combinations
4674 into single instructions. */
4675
4676 auto wcst = wi::to_wide (cst);
4677 int pow = wi::exact_log2 (wcst + 1);
4678 if (pow == prec / 2)
4679 {
4680 gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
4681
4682 gimple_ranger ranger;
4683 int_range_max r;
4684
4685 /* Check that no overflow will occur. If we don't have range
4686 information we can't perform the optimization. */
4687
4688 if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
4689 {
4690 wide_int max = r.upper_bound ();
4691 wide_int one = wi::shwi (1, prec);
4692 wide_int adder = wi::add (one, wi::lshift (one, pow));
4693 wi::overflow_type ovf;
4694 wi::add (max, adder, UNSIGNED, &ovf);
4695 if (ovf == wi::OVF_NONE)
4696 {
4697 *type_out = vectype;
4698 tree tadder = wide_int_to_tree (itype, adder);
4699 tree rshift = wide_int_to_tree (itype, pow);
4700
4701 tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
4702 gassign *patt1
4703 = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
4704 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4705
4706 tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
4707 patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
4708 rshift);
4709 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4710
4711 tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
4712 patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
4713 oprnd0);
4714 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4715
4716 tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
4717 pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
4718 new_lhs3, rshift);
4719
4720 return pattern_stmt;
4721 }
4722 }
4723 }
4724 }
4725
4726 if (prec > HOST_BITS_PER_WIDE_INT
4727 || integer_zerop (oprnd1))
4728 return NULL;
4729
4730 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
4731 return NULL;
4732
4733 if (TYPE_UNSIGNED (itype))
4734 {
4735 unsigned HOST_WIDE_INT mh, ml;
4736 int pre_shift, post_shift;
4737 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
4738 & GET_MODE_MASK (itype_mode));
4739 tree t1, t2, t3, t4;
4740
4741 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
4742 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
4743 return NULL;
4744
4745 /* Find a suitable multiplier and right shift count
4746 instead of multiplying with D. */
4747 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
4748
4749 /* If the suggested multiplier is more than SIZE bits, we can do better
4750 for even divisors, using an initial right shift. */
4751 if (mh != 0 && (d & 1) == 0)
4752 {
4753 pre_shift = ctz_or_zero (d);
4754 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
4755 &ml, &post_shift, &dummy_int);
4756 gcc_assert (!mh);
4757 }
4758 else
4759 pre_shift = 0;
4760
4761 if (mh != 0)
4762 {
4763 if (post_shift - 1 >= prec)
4764 return NULL;
4765
4766 /* t1 = oprnd0 h* ml;
4767 t2 = oprnd0 - t1;
4768 t3 = t2 >> 1;
4769 t4 = t1 + t3;
4770 q = t4 >> (post_shift - 1); */
4771 t1 = vect_recog_temp_ssa_var (itype, NULL);
4772 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4773 build_int_cst (itype, ml));
4774 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4775
4776 t2 = vect_recog_temp_ssa_var (itype, NULL);
4777 def_stmt
4778 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
4779 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4780
4781 t3 = vect_recog_temp_ssa_var (itype, NULL);
4782 def_stmt
4783 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
4784 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4785
4786 t4 = vect_recog_temp_ssa_var (itype, NULL);
4787 def_stmt
4788 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
4789
4790 if (post_shift != 1)
4791 {
4792 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4793
4794 q = vect_recog_temp_ssa_var (itype, NULL);
4795 pattern_stmt
4796 = gimple_build_assign (q, RSHIFT_EXPR, t4,
4797 build_int_cst (itype, post_shift - 1));
4798 }
4799 else
4800 {
4801 q = t4;
4802 pattern_stmt = def_stmt;
4803 }
4804 }
4805 else
4806 {
4807 if (pre_shift >= prec || post_shift >= prec)
4808 return NULL;
4809
4810 /* t1 = oprnd0 >> pre_shift;
4811 t2 = t1 h* ml;
4812 q = t2 >> post_shift; */
4813 if (pre_shift)
4814 {
4815 t1 = vect_recog_temp_ssa_var (itype, NULL);
4816 def_stmt
4817 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
4818 build_int_cst (NULL, pre_shift));
4819 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4820 }
4821 else
4822 t1 = oprnd0;
4823
4824 t2 = vect_recog_temp_ssa_var (itype, NULL);
4825 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
4826 build_int_cst (itype, ml));
4827
4828 if (post_shift)
4829 {
4830 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4831
4832 q = vect_recog_temp_ssa_var (itype, NULL);
4833 def_stmt
4834 = gimple_build_assign (q, RSHIFT_EXPR, t2,
4835 build_int_cst (itype, post_shift));
4836 }
4837 else
4838 q = t2;
4839
4840 pattern_stmt = def_stmt;
4841 }
4842 }
4843 else
4844 {
4845 unsigned HOST_WIDE_INT ml;
4846 int post_shift;
4847 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
4848 unsigned HOST_WIDE_INT abs_d;
4849 bool add = false;
4850 tree t1, t2, t3, t4;
4851
4852 /* Give up for -1. */
4853 if (d == -1)
4854 return NULL;
4855
4856 /* Since d might be INT_MIN, we have to cast to
4857 unsigned HOST_WIDE_INT before negating to avoid
4858 undefined signed overflow. */
4859 abs_d = (d >= 0
4860 ? (unsigned HOST_WIDE_INT) d
4861 : - (unsigned HOST_WIDE_INT) d);
4862
4863 /* n rem d = n rem -d */
4864 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
4865 {
4866 d = abs_d;
4867 oprnd1 = build_int_cst (itype, abs_d);
4868 }
4869 if (HOST_BITS_PER_WIDE_INT >= prec
4870 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
4871 /* This case is not handled correctly below. */
4872 return NULL;
4873
4874 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
4875 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
4876 {
4877 add = true;
4878 ml |= HOST_WIDE_INT_M1U << (prec - 1);
4879 }
4880 if (post_shift >= prec)
4881 return NULL;
4882
4883 /* t1 = oprnd0 h* ml; */
4884 t1 = vect_recog_temp_ssa_var (itype, NULL);
4885 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4886 build_int_cst (itype, ml));
4887
4888 if (add)
4889 {
4890 /* t2 = t1 + oprnd0; */
4891 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4892 t2 = vect_recog_temp_ssa_var (itype, NULL);
4893 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
4894 }
4895 else
4896 t2 = t1;
4897
4898 if (post_shift)
4899 {
4900 /* t3 = t2 >> post_shift; */
4901 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4902 t3 = vect_recog_temp_ssa_var (itype, NULL);
4903 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
4904 build_int_cst (itype, post_shift));
4905 }
4906 else
4907 t3 = t2;
4908
4909 int msb = 1;
4910 value_range r;
4911 get_range_query (cfun)->range_of_expr (r, oprnd0);
4912 if (!r.varying_p () && !r.undefined_p ())
4913 {
4914 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
4915 msb = 0;
4916 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
4917 msb = -1;
4918 }
4919
4920 if (msb == 0 && d >= 0)
4921 {
4922 /* q = t3; */
4923 q = t3;
4924 pattern_stmt = def_stmt;
4925 }
4926 else
4927 {
4928 /* t4 = oprnd0 >> (prec - 1);
4929 or if we know from VRP that oprnd0 >= 0
4930 t4 = 0;
4931 or if we know from VRP that oprnd0 < 0
4932 t4 = -1; */
4933 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4934 t4 = vect_recog_temp_ssa_var (itype, NULL);
4935 if (msb != 1)
4936 def_stmt = gimple_build_assign (t4, INTEGER_CST,
4937 build_int_cst (itype, msb));
4938 else
4939 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
4940 build_int_cst (itype, prec - 1));
4941 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4942
4943 /* q = t3 - t4; or q = t4 - t3; */
4944 q = vect_recog_temp_ssa_var (itype, NULL);
4945 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
4946 d < 0 ? t3 : t4);
4947 }
4948 }
4949
4950 if (rhs_code == TRUNC_MOD_EXPR)
4951 {
4952 tree r, t1;
4953
4954 /* We divided. Now finish by:
4955 t1 = q * oprnd1;
4956 r = oprnd0 - t1; */
4957 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
4958
4959 t1 = vect_recog_temp_ssa_var (itype, NULL);
4960 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
4961 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4962
4963 r = vect_recog_temp_ssa_var (itype, NULL);
4964 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
4965 }
4966
4967 /* Pattern detected. */
4968 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4969
4970 *type_out = vectype;
4971 return pattern_stmt;
4972 }
4973
4974 /* Function vect_recog_mixed_size_cond_pattern
4975
4976 Try to find the following pattern:
4977
4978 type x_t, y_t;
4979 TYPE a_T, b_T, c_T;
4980 loop:
4981 S1 a_T = x_t CMP y_t ? b_T : c_T;
4982
4983 where type 'TYPE' is an integral type which has different size
4984 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
4985 than 'type', the constants need to fit into an integer type
4986 with the same width as 'type') or results of conversion from 'type'.
4987
4988 Input:
4989
4990 * STMT_VINFO: The stmt from which the pattern search begins.
4991
4992 Output:
4993
4994 * TYPE_OUT: The type of the output of this pattern.
4995
4996 * Return value: A new stmt that will be used to replace the pattern.
4997 Additionally a def_stmt is added.
4998
4999 a_it = x_t CMP y_t ? b_it : c_it;
5000 a_T = (TYPE) a_it; */
5001
5002 static gimple *
5003 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
5004 stmt_vec_info stmt_vinfo, tree *type_out)
5005 {
5006 gimple *last_stmt = stmt_vinfo->stmt;
5007 tree cond_expr, then_clause, else_clause;
5008 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
5009 gimple *pattern_stmt, *def_stmt;
5010 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
5011 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
5012 bool promotion;
5013 tree comp_scalar_type;
5014
5015 if (!is_gimple_assign (last_stmt)
5016 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
5017 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
5018 return NULL;
5019
5020 cond_expr = gimple_assign_rhs1 (last_stmt);
5021 then_clause = gimple_assign_rhs2 (last_stmt);
5022 else_clause = gimple_assign_rhs3 (last_stmt);
5023
5024 if (!COMPARISON_CLASS_P (cond_expr))
5025 return NULL;
5026
5027 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
5028 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
5029 if (comp_vectype == NULL_TREE)
5030 return NULL;
5031
5032 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
5033 if (types_compatible_p (type, comp_scalar_type)
5034 || ((TREE_CODE (then_clause) != INTEGER_CST
5035 || TREE_CODE (else_clause) != INTEGER_CST)
5036 && !INTEGRAL_TYPE_P (comp_scalar_type))
5037 || !INTEGRAL_TYPE_P (type))
5038 return NULL;
5039
5040 if ((TREE_CODE (then_clause) != INTEGER_CST
5041 && !type_conversion_p (vinfo, then_clause, false,
5042 &orig_type0, &def_stmt0, &promotion))
5043 || (TREE_CODE (else_clause) != INTEGER_CST
5044 && !type_conversion_p (vinfo, else_clause, false,
5045 &orig_type1, &def_stmt1, &promotion)))
5046 return NULL;
5047
5048 if (orig_type0 && orig_type1
5049 && !types_compatible_p (orig_type0, orig_type1))
5050 return NULL;
5051
5052 if (orig_type0)
5053 {
5054 if (!types_compatible_p (orig_type0, comp_scalar_type))
5055 return NULL;
5056 then_clause = gimple_assign_rhs1 (def_stmt0);
5057 itype = orig_type0;
5058 }
5059
5060 if (orig_type1)
5061 {
5062 if (!types_compatible_p (orig_type1, comp_scalar_type))
5063 return NULL;
5064 else_clause = gimple_assign_rhs1 (def_stmt1);
5065 itype = orig_type1;
5066 }
5067
5068
5069 HOST_WIDE_INT cmp_mode_size
5070 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
5071
5072 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
5073 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
5074 return NULL;
5075
5076 vectype = get_vectype_for_scalar_type (vinfo, type);
5077 if (vectype == NULL_TREE)
5078 return NULL;
5079
5080 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
5081 return NULL;
5082
5083 if (itype == NULL_TREE)
5084 itype = build_nonstandard_integer_type (cmp_mode_size,
5085 TYPE_UNSIGNED (type));
5086
5087 if (itype == NULL_TREE
5088 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
5089 return NULL;
5090
5091 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5092 if (vecitype == NULL_TREE)
5093 return NULL;
5094
5095 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
5096 return NULL;
5097
5098 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
5099 {
5100 if ((TREE_CODE (then_clause) == INTEGER_CST
5101 && !int_fits_type_p (then_clause, itype))
5102 || (TREE_CODE (else_clause) == INTEGER_CST
5103 && !int_fits_type_p (else_clause, itype)))
5104 return NULL;
5105 }
5106
5107 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5108 COND_EXPR, unshare_expr (cond_expr),
5109 fold_convert (itype, then_clause),
5110 fold_convert (itype, else_clause));
5111 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5112 NOP_EXPR, gimple_assign_lhs (def_stmt));
5113
5114 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
5115 *type_out = vectype;
5116
5117 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
5118
5119 return pattern_stmt;
5120 }
5121
5122
5123 /* Helper function of vect_recog_bool_pattern. Called recursively, return
5124 true if bool VAR can and should be optimized that way. Assume it shouldn't
5125 in case it's a result of a comparison which can be directly vectorized into
5126 a vector comparison. Fills in STMTS with all stmts visited during the
5127 walk. */
5128
5129 static bool
5130 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
5131 {
5132 tree rhs1;
5133 enum tree_code rhs_code;
5134
5135 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5136 if (!def_stmt_info)
5137 return false;
5138
5139 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
5140 if (!def_stmt)
5141 return false;
5142
5143 if (stmts.contains (def_stmt))
5144 return true;
5145
5146 rhs1 = gimple_assign_rhs1 (def_stmt);
5147 rhs_code = gimple_assign_rhs_code (def_stmt);
5148 switch (rhs_code)
5149 {
5150 case SSA_NAME:
5151 if (! check_bool_pattern (rhs1, vinfo, stmts))
5152 return false;
5153 break;
5154
5155 CASE_CONVERT:
5156 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
5157 return false;
5158 if (! check_bool_pattern (rhs1, vinfo, stmts))
5159 return false;
5160 break;
5161
5162 case BIT_NOT_EXPR:
5163 if (! check_bool_pattern (rhs1, vinfo, stmts))
5164 return false;
5165 break;
5166
5167 case BIT_AND_EXPR:
5168 case BIT_IOR_EXPR:
5169 case BIT_XOR_EXPR:
5170 if (! check_bool_pattern (rhs1, vinfo, stmts)
5171 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
5172 return false;
5173 break;
5174
5175 default:
5176 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5177 {
5178 tree vecitype, comp_vectype;
5179
5180 /* If the comparison can throw, then is_gimple_condexpr will be
5181 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5182 if (stmt_could_throw_p (cfun, def_stmt))
5183 return false;
5184
5185 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
5186 if (comp_vectype == NULL_TREE)
5187 return false;
5188
5189 tree mask_type = get_mask_type_for_scalar_type (vinfo,
5190 TREE_TYPE (rhs1));
5191 if (mask_type
5192 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5193 return false;
5194
5195 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
5196 {
5197 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5198 tree itype
5199 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5200 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5201 if (vecitype == NULL_TREE)
5202 return false;
5203 }
5204 else
5205 vecitype = comp_vectype;
5206 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
5207 return false;
5208 }
5209 else
5210 return false;
5211 break;
5212 }
5213
5214 bool res = stmts.add (def_stmt);
5215 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5216 gcc_assert (!res);
5217
5218 return true;
5219 }
5220
5221
5222 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5223 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5224 pattern sequence. */
5225
5226 static tree
5227 adjust_bool_pattern_cast (vec_info *vinfo,
5228 tree type, tree var, stmt_vec_info stmt_info)
5229 {
5230 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5231 NOP_EXPR, var);
5232 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
5233 get_vectype_for_scalar_type (vinfo, type));
5234 return gimple_assign_lhs (cast_stmt);
5235 }
5236
5237 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5238 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5239 type, OUT_TYPE is the desired final integer type of the whole pattern.
5240 STMT_INFO is the info of the pattern root and is where pattern stmts should
5241 be associated with. DEFS is a map of pattern defs. */
5242
5243 static void
5244 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
5245 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
5246 {
5247 gimple *stmt = SSA_NAME_DEF_STMT (var);
5248 enum tree_code rhs_code, def_rhs_code;
5249 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
5250 location_t loc;
5251 gimple *pattern_stmt, *def_stmt;
5252 tree trueval = NULL_TREE;
5253
5254 rhs1 = gimple_assign_rhs1 (stmt);
5255 rhs2 = gimple_assign_rhs2 (stmt);
5256 rhs_code = gimple_assign_rhs_code (stmt);
5257 loc = gimple_location (stmt);
5258 switch (rhs_code)
5259 {
5260 case SSA_NAME:
5261 CASE_CONVERT:
5262 irhs1 = *defs.get (rhs1);
5263 itype = TREE_TYPE (irhs1);
5264 pattern_stmt
5265 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5266 SSA_NAME, irhs1);
5267 break;
5268
5269 case BIT_NOT_EXPR:
5270 irhs1 = *defs.get (rhs1);
5271 itype = TREE_TYPE (irhs1);
5272 pattern_stmt
5273 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5274 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
5275 break;
5276
5277 case BIT_AND_EXPR:
5278 /* Try to optimize x = y & (a < b ? 1 : 0); into
5279 x = (a < b ? y : 0);
5280
5281 E.g. for:
5282 bool a_b, b_b, c_b;
5283 TYPE d_T;
5284
5285 S1 a_b = x1 CMP1 y1;
5286 S2 b_b = x2 CMP2 y2;
5287 S3 c_b = a_b & b_b;
5288 S4 d_T = (TYPE) c_b;
5289
5290 we would normally emit:
5291
5292 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5293 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5294 S3' c_T = a_T & b_T;
5295 S4' d_T = c_T;
5296
5297 but we can save one stmt by using the
5298 result of one of the COND_EXPRs in the other COND_EXPR and leave
5299 BIT_AND_EXPR stmt out:
5300
5301 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5302 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5303 S4' f_T = c_T;
5304
5305 At least when VEC_COND_EXPR is implemented using masks
5306 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5307 computes the comparison masks and ands it, in one case with
5308 all ones vector, in the other case with a vector register.
5309 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5310 often more expensive. */
5311 def_stmt = SSA_NAME_DEF_STMT (rhs2);
5312 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5313 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5314 {
5315 irhs1 = *defs.get (rhs1);
5316 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5317 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5318 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5319 {
5320 rhs_code = def_rhs_code;
5321 rhs1 = def_rhs1;
5322 rhs2 = gimple_assign_rhs2 (def_stmt);
5323 trueval = irhs1;
5324 goto do_compare;
5325 }
5326 else
5327 irhs2 = *defs.get (rhs2);
5328 goto and_ior_xor;
5329 }
5330 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5331 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5332 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5333 {
5334 irhs2 = *defs.get (rhs2);
5335 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5336 if (TYPE_PRECISION (TREE_TYPE (irhs2))
5337 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5338 {
5339 rhs_code = def_rhs_code;
5340 rhs1 = def_rhs1;
5341 rhs2 = gimple_assign_rhs2 (def_stmt);
5342 trueval = irhs2;
5343 goto do_compare;
5344 }
5345 else
5346 irhs1 = *defs.get (rhs1);
5347 goto and_ior_xor;
5348 }
5349 /* FALLTHRU */
5350 case BIT_IOR_EXPR:
5351 case BIT_XOR_EXPR:
5352 irhs1 = *defs.get (rhs1);
5353 irhs2 = *defs.get (rhs2);
5354 and_ior_xor:
5355 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5356 != TYPE_PRECISION (TREE_TYPE (irhs2)))
5357 {
5358 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
5359 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
5360 int out_prec = TYPE_PRECISION (out_type);
5361 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
5362 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
5363 stmt_info);
5364 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
5365 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
5366 stmt_info);
5367 else
5368 {
5369 irhs1 = adjust_bool_pattern_cast (vinfo,
5370 out_type, irhs1, stmt_info);
5371 irhs2 = adjust_bool_pattern_cast (vinfo,
5372 out_type, irhs2, stmt_info);
5373 }
5374 }
5375 itype = TREE_TYPE (irhs1);
5376 pattern_stmt
5377 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5378 rhs_code, irhs1, irhs2);
5379 break;
5380
5381 default:
5382 do_compare:
5383 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
5384 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
5385 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
5386 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
5387 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
5388 {
5389 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5390 itype
5391 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5392 }
5393 else
5394 itype = TREE_TYPE (rhs1);
5395 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
5396 if (trueval == NULL_TREE)
5397 trueval = build_int_cst (itype, 1);
5398 else
5399 gcc_checking_assert (useless_type_conversion_p (itype,
5400 TREE_TYPE (trueval)));
5401 pattern_stmt
5402 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5403 COND_EXPR, cond_expr, trueval,
5404 build_int_cst (itype, 0));
5405 break;
5406 }
5407
5408 gimple_set_location (pattern_stmt, loc);
5409 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
5410 get_vectype_for_scalar_type (vinfo, itype));
5411 defs.put (var, gimple_assign_lhs (pattern_stmt));
5412 }
5413
5414 /* Comparison function to qsort a vector of gimple stmts after UID. */
5415
5416 static int
5417 sort_after_uid (const void *p1, const void *p2)
5418 {
5419 const gimple *stmt1 = *(const gimple * const *)p1;
5420 const gimple *stmt2 = *(const gimple * const *)p2;
5421 return gimple_uid (stmt1) - gimple_uid (stmt2);
5422 }
5423
5424 /* Create pattern stmts for all stmts participating in the bool pattern
5425 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5426 OUT_TYPE. Return the def of the pattern root. */
5427
5428 static tree
5429 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
5430 tree out_type, stmt_vec_info stmt_info)
5431 {
5432 /* Gather original stmts in the bool pattern in their order of appearance
5433 in the IL. */
5434 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
5435 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
5436 i != bool_stmt_set.end (); ++i)
5437 bool_stmts.quick_push (*i);
5438 bool_stmts.qsort (sort_after_uid);
5439
5440 /* Now process them in that order, producing pattern stmts. */
5441 hash_map <tree, tree> defs;
5442 for (unsigned i = 0; i < bool_stmts.length (); ++i)
5443 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
5444 out_type, stmt_info, defs);
5445
5446 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5447 gimple *pattern_stmt
5448 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5449 return gimple_assign_lhs (pattern_stmt);
5450 }
5451
5452 /* Return the proper type for converting bool VAR into
5453 an integer value or NULL_TREE if no such type exists.
5454 The type is chosen so that the converted value has the
5455 same number of elements as VAR's vector type. */
5456
5457 static tree
5458 integer_type_for_mask (tree var, vec_info *vinfo)
5459 {
5460 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5461 return NULL_TREE;
5462
5463 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5464 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
5465 return NULL_TREE;
5466
5467 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5468 }
5469
5470 /* Function vect_recog_bool_pattern
5471
5472 Try to find pattern like following:
5473
5474 bool a_b, b_b, c_b, d_b, e_b;
5475 TYPE f_T;
5476 loop:
5477 S1 a_b = x1 CMP1 y1;
5478 S2 b_b = x2 CMP2 y2;
5479 S3 c_b = a_b & b_b;
5480 S4 d_b = x3 CMP3 y3;
5481 S5 e_b = c_b | d_b;
5482 S6 f_T = (TYPE) e_b;
5483
5484 where type 'TYPE' is an integral type. Or a similar pattern
5485 ending in
5486
5487 S6 f_Y = e_b ? r_Y : s_Y;
5488
5489 as results from if-conversion of a complex condition.
5490
5491 Input:
5492
5493 * STMT_VINFO: The stmt at the end from which the pattern
5494 search begins, i.e. cast of a bool to
5495 an integer type.
5496
5497 Output:
5498
5499 * TYPE_OUT: The type of the output of this pattern.
5500
5501 * Return value: A new stmt that will be used to replace the pattern.
5502
5503 Assuming size of TYPE is the same as size of all comparisons
5504 (otherwise some casts would be added where needed), the above
5505 sequence we create related pattern stmts:
5506 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5507 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5508 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5509 S5' e_T = c_T | d_T;
5510 S6' f_T = e_T;
5511
5512 Instead of the above S3' we could emit:
5513 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5514 S3' c_T = a_T | b_T;
5515 but the above is more efficient. */
5516
5517 static gimple *
5518 vect_recog_bool_pattern (vec_info *vinfo,
5519 stmt_vec_info stmt_vinfo, tree *type_out)
5520 {
5521 gimple *last_stmt = stmt_vinfo->stmt;
5522 enum tree_code rhs_code;
5523 tree var, lhs, rhs, vectype;
5524 gimple *pattern_stmt;
5525
5526 if (!is_gimple_assign (last_stmt))
5527 return NULL;
5528
5529 var = gimple_assign_rhs1 (last_stmt);
5530 lhs = gimple_assign_lhs (last_stmt);
5531 rhs_code = gimple_assign_rhs_code (last_stmt);
5532
5533 if (rhs_code == VIEW_CONVERT_EXPR)
5534 var = TREE_OPERAND (var, 0);
5535
5536 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5537 return NULL;
5538
5539 hash_set<gimple *> bool_stmts;
5540
5541 if (CONVERT_EXPR_CODE_P (rhs_code)
5542 || rhs_code == VIEW_CONVERT_EXPR)
5543 {
5544 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
5545 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5546 return NULL;
5547 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5548
5549 if (check_bool_pattern (var, vinfo, bool_stmts))
5550 {
5551 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5552 TREE_TYPE (lhs), stmt_vinfo);
5553 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5554 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5555 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5556 else
5557 pattern_stmt
5558 = gimple_build_assign (lhs, NOP_EXPR, rhs);
5559 }
5560 else
5561 {
5562 tree type = integer_type_for_mask (var, vinfo);
5563 tree cst0, cst1, tmp;
5564
5565 if (!type)
5566 return NULL;
5567
5568 /* We may directly use cond with narrowed type to avoid
5569 multiple cond exprs with following result packing and
5570 perform single cond with packed mask instead. In case
5571 of widening we better make cond first and then extract
5572 results. */
5573 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
5574 type = TREE_TYPE (lhs);
5575
5576 cst0 = build_int_cst (type, 0);
5577 cst1 = build_int_cst (type, 1);
5578 tmp = vect_recog_temp_ssa_var (type, NULL);
5579 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
5580
5581 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
5582 {
5583 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
5584 append_pattern_def_seq (vinfo, stmt_vinfo,
5585 pattern_stmt, new_vectype);
5586
5587 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5588 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
5589 }
5590 }
5591
5592 *type_out = vectype;
5593 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5594
5595 return pattern_stmt;
5596 }
5597 else if (rhs_code == COND_EXPR
5598 && TREE_CODE (var) == SSA_NAME)
5599 {
5600 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5601 if (vectype == NULL_TREE)
5602 return NULL;
5603
5604 /* Build a scalar type for the boolean result that when
5605 vectorized matches the vector type of the result in
5606 size and number of elements. */
5607 unsigned prec
5608 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
5609 TYPE_VECTOR_SUBPARTS (vectype));
5610
5611 tree type
5612 = build_nonstandard_integer_type (prec,
5613 TYPE_UNSIGNED (TREE_TYPE (var)));
5614 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
5615 return NULL;
5616
5617 if (check_bool_pattern (var, vinfo, bool_stmts))
5618 var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
5619 else if (integer_type_for_mask (var, vinfo))
5620 return NULL;
5621
5622 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5623 pattern_stmt
5624 = gimple_build_assign (lhs, COND_EXPR,
5625 build2 (NE_EXPR, boolean_type_node,
5626 var, build_int_cst (TREE_TYPE (var), 0)),
5627 gimple_assign_rhs2 (last_stmt),
5628 gimple_assign_rhs3 (last_stmt));
5629 *type_out = vectype;
5630 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5631
5632 return pattern_stmt;
5633 }
5634 else if (rhs_code == SSA_NAME
5635 && STMT_VINFO_DATA_REF (stmt_vinfo))
5636 {
5637 stmt_vec_info pattern_stmt_info;
5638 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5639 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
5640 return NULL;
5641
5642 if (check_bool_pattern (var, vinfo, bool_stmts))
5643 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5644 TREE_TYPE (vectype), stmt_vinfo);
5645 else
5646 {
5647 tree type = integer_type_for_mask (var, vinfo);
5648 tree cst0, cst1, new_vectype;
5649
5650 if (!type)
5651 return NULL;
5652
5653 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
5654 type = TREE_TYPE (vectype);
5655
5656 cst0 = build_int_cst (type, 0);
5657 cst1 = build_int_cst (type, 1);
5658 new_vectype = get_vectype_for_scalar_type (vinfo, type);
5659
5660 rhs = vect_recog_temp_ssa_var (type, NULL);
5661 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
5662 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
5663 }
5664
5665 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
5666 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5667 {
5668 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5669 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
5670 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
5671 rhs = rhs2;
5672 }
5673 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5674 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5675 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5676 *type_out = vectype;
5677 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5678
5679 return pattern_stmt;
5680 }
5681 else
5682 return NULL;
5683 }
5684
5685
5686 /* A helper for vect_recog_mask_conversion_pattern. Build
5687 conversion of MASK to a type suitable for masking VECTYPE.
5688 Built statement gets required vectype and is appended to
5689 a pattern sequence of STMT_VINFO.
5690
5691 Return converted mask. */
5692
5693 static tree
5694 build_mask_conversion (vec_info *vinfo,
5695 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
5696 {
5697 gimple *stmt;
5698 tree masktype, tmp;
5699
5700 masktype = truth_type_for (vectype);
5701 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
5702 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
5703 append_pattern_def_seq (vinfo, stmt_vinfo,
5704 stmt, masktype, TREE_TYPE (vectype));
5705
5706 return tmp;
5707 }
5708
5709
5710 /* Function vect_recog_mask_conversion_pattern
5711
5712 Try to find statements which require boolean type
5713 converison. Additional conversion statements are
5714 added to handle such cases. For example:
5715
5716 bool m_1, m_2, m_3;
5717 int i_4, i_5;
5718 double d_6, d_7;
5719 char c_1, c_2, c_3;
5720
5721 S1 m_1 = i_4 > i_5;
5722 S2 m_2 = d_6 < d_7;
5723 S3 m_3 = m_1 & m_2;
5724 S4 c_1 = m_3 ? c_2 : c_3;
5725
5726 Will be transformed into:
5727
5728 S1 m_1 = i_4 > i_5;
5729 S2 m_2 = d_6 < d_7;
5730 S3'' m_2' = (_Bool[bitsize=32])m_2
5731 S3' m_3' = m_1 & m_2';
5732 S4'' m_3'' = (_Bool[bitsize=8])m_3'
5733 S4' c_1' = m_3'' ? c_2 : c_3; */
5734
5735 static gimple *
5736 vect_recog_mask_conversion_pattern (vec_info *vinfo,
5737 stmt_vec_info stmt_vinfo, tree *type_out)
5738 {
5739 gimple *last_stmt = stmt_vinfo->stmt;
5740 enum tree_code rhs_code;
5741 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
5742 tree vectype1, vectype2;
5743 stmt_vec_info pattern_stmt_info;
5744 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
5745 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
5746
5747 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
5748 if (is_gimple_call (last_stmt)
5749 && gimple_call_internal_p (last_stmt))
5750 {
5751 gcall *pattern_stmt;
5752
5753 internal_fn ifn = gimple_call_internal_fn (last_stmt);
5754 int mask_argno = internal_fn_mask_index (ifn);
5755 if (mask_argno < 0)
5756 return NULL;
5757
5758 bool store_p = internal_store_fn_p (ifn);
5759 if (store_p)
5760 {
5761 int rhs_index = internal_fn_stored_value_index (ifn);
5762 tree rhs = gimple_call_arg (last_stmt, rhs_index);
5763 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
5764 }
5765 else
5766 {
5767 lhs = gimple_call_lhs (last_stmt);
5768 if (!lhs)
5769 return NULL;
5770 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5771 }
5772
5773 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
5774 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
5775 if (!mask_arg_type)
5776 return NULL;
5777 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
5778
5779 if (!vectype1 || !vectype2
5780 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5781 TYPE_VECTOR_SUBPARTS (vectype2)))
5782 return NULL;
5783
5784 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
5785
5786 auto_vec<tree, 8> args;
5787 unsigned int nargs = gimple_call_num_args (last_stmt);
5788 args.safe_grow (nargs, true);
5789 for (unsigned int i = 0; i < nargs; ++i)
5790 args[i] = ((int) i == mask_argno
5791 ? tmp
5792 : gimple_call_arg (last_stmt, i));
5793 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
5794
5795 if (!store_p)
5796 {
5797 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5798 gimple_call_set_lhs (pattern_stmt, lhs);
5799 }
5800 gimple_call_set_nothrow (pattern_stmt, true);
5801
5802 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5803 if (STMT_VINFO_DATA_REF (stmt_vinfo))
5804 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5805
5806 *type_out = vectype1;
5807 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5808
5809 return pattern_stmt;
5810 }
5811
5812 if (!is_gimple_assign (last_stmt))
5813 return NULL;
5814
5815 gimple *pattern_stmt;
5816 lhs = gimple_assign_lhs (last_stmt);
5817 rhs1 = gimple_assign_rhs1 (last_stmt);
5818 rhs_code = gimple_assign_rhs_code (last_stmt);
5819
5820 /* Check for cond expression requiring mask conversion. */
5821 if (rhs_code == COND_EXPR)
5822 {
5823 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5824
5825 if (TREE_CODE (rhs1) == SSA_NAME)
5826 {
5827 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5828 if (!rhs1_type)
5829 return NULL;
5830 }
5831 else if (COMPARISON_CLASS_P (rhs1))
5832 {
5833 /* Check whether we're comparing scalar booleans and (if so)
5834 whether a better mask type exists than the mask associated
5835 with boolean-sized elements. This avoids unnecessary packs
5836 and unpacks if the booleans are set from comparisons of
5837 wider types. E.g. in:
5838
5839 int x1, x2, x3, x4, y1, y1;
5840 ...
5841 bool b1 = (x1 == x2);
5842 bool b2 = (x3 == x4);
5843 ... = b1 == b2 ? y1 : y2;
5844
5845 it is better for b1 and b2 to use the mask type associated
5846 with int elements rather bool (byte) elements. */
5847 rhs1_op0 = TREE_OPERAND (rhs1, 0);
5848 rhs1_op1 = TREE_OPERAND (rhs1, 1);
5849 if (!rhs1_op0 || !rhs1_op1)
5850 return NULL;
5851 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
5852 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
5853
5854 if (!rhs1_op0_type)
5855 rhs1_type = TREE_TYPE (rhs1_op0);
5856 else if (!rhs1_op1_type)
5857 rhs1_type = TREE_TYPE (rhs1_op1);
5858 else if (TYPE_PRECISION (rhs1_op0_type)
5859 != TYPE_PRECISION (rhs1_op1_type))
5860 {
5861 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
5862 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5863 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
5864 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5865 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
5866 {
5867 if (abs (tmp0) > abs (tmp1))
5868 rhs1_type = rhs1_op1_type;
5869 else
5870 rhs1_type = rhs1_op0_type;
5871 }
5872 else
5873 rhs1_type = build_nonstandard_integer_type
5874 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
5875 }
5876 else
5877 rhs1_type = rhs1_op0_type;
5878 }
5879 else
5880 return NULL;
5881
5882 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
5883
5884 if (!vectype1 || !vectype2)
5885 return NULL;
5886
5887 /* Continue if a conversion is needed. Also continue if we have
5888 a comparison whose vector type would normally be different from
5889 VECTYPE2 when considered in isolation. In that case we'll
5890 replace the comparison with an SSA name (so that we can record
5891 its vector type) and behave as though the comparison was an SSA
5892 name from the outset. */
5893 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5894 TYPE_VECTOR_SUBPARTS (vectype2))
5895 && !rhs1_op0_type
5896 && !rhs1_op1_type)
5897 return NULL;
5898
5899 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
5900 in place, we can handle it in vectorizable_condition. This avoids
5901 unnecessary promotion stmts and increased vectorization factor. */
5902 if (COMPARISON_CLASS_P (rhs1)
5903 && INTEGRAL_TYPE_P (rhs1_type)
5904 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
5905 TYPE_VECTOR_SUBPARTS (vectype2)))
5906 {
5907 enum vect_def_type dt;
5908 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
5909 && dt == vect_external_def
5910 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
5911 && (dt == vect_external_def
5912 || dt == vect_constant_def))
5913 {
5914 tree wide_scalar_type = build_nonstandard_integer_type
5915 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
5916 tree vectype3 = get_vectype_for_scalar_type (vinfo,
5917 wide_scalar_type);
5918 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
5919 return NULL;
5920 }
5921 }
5922
5923 /* If rhs1 is a comparison we need to move it into a
5924 separate statement. */
5925 if (TREE_CODE (rhs1) != SSA_NAME)
5926 {
5927 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
5928 if (rhs1_op0_type
5929 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
5930 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
5931 vectype2, stmt_vinfo);
5932 if (rhs1_op1_type
5933 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
5934 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
5935 vectype2, stmt_vinfo);
5936 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
5937 rhs1_op0, rhs1_op1);
5938 rhs1 = tmp;
5939 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
5940 rhs1_type);
5941 }
5942
5943 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
5944 TYPE_VECTOR_SUBPARTS (vectype2)))
5945 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
5946 else
5947 tmp = rhs1;
5948
5949 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5950 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
5951 gimple_assign_rhs2 (last_stmt),
5952 gimple_assign_rhs3 (last_stmt));
5953
5954 *type_out = vectype1;
5955 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5956
5957 return pattern_stmt;
5958 }
5959
5960 /* Now check for binary boolean operations requiring conversion for
5961 one of operands. */
5962 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5963 return NULL;
5964
5965 if (rhs_code != BIT_IOR_EXPR
5966 && rhs_code != BIT_XOR_EXPR
5967 && rhs_code != BIT_AND_EXPR
5968 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
5969 return NULL;
5970
5971 rhs2 = gimple_assign_rhs2 (last_stmt);
5972
5973 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5974 rhs2_type = integer_type_for_mask (rhs2, vinfo);
5975
5976 if (!rhs1_type || !rhs2_type
5977 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
5978 return NULL;
5979
5980 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
5981 {
5982 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
5983 if (!vectype1)
5984 return NULL;
5985 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
5986 }
5987 else
5988 {
5989 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
5990 if (!vectype1)
5991 return NULL;
5992 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
5993 }
5994
5995 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5996 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
5997
5998 *type_out = vectype1;
5999 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6000
6001 return pattern_stmt;
6002 }
6003
6004 /* STMT_INFO is a load or store. If the load or store is conditional, return
6005 the boolean condition under which it occurs, otherwise return null. */
6006
6007 static tree
6008 vect_get_load_store_mask (stmt_vec_info stmt_info)
6009 {
6010 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
6011 {
6012 gcc_assert (gimple_assign_single_p (def_assign));
6013 return NULL_TREE;
6014 }
6015
6016 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
6017 {
6018 internal_fn ifn = gimple_call_internal_fn (def_call);
6019 int mask_index = internal_fn_mask_index (ifn);
6020 return gimple_call_arg (def_call, mask_index);
6021 }
6022
6023 gcc_unreachable ();
6024 }
6025
6026 /* Return MASK if MASK is suitable for masking an operation on vectors
6027 of type VECTYPE, otherwise convert it into such a form and return
6028 the result. Associate any conversion statements with STMT_INFO's
6029 pattern. */
6030
6031 static tree
6032 vect_convert_mask_for_vectype (tree mask, tree vectype,
6033 stmt_vec_info stmt_info, vec_info *vinfo)
6034 {
6035 tree mask_type = integer_type_for_mask (mask, vinfo);
6036 if (mask_type)
6037 {
6038 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
6039 if (mask_vectype
6040 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
6041 TYPE_VECTOR_SUBPARTS (mask_vectype)))
6042 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
6043 }
6044 return mask;
6045 }
6046
6047 /* Return the equivalent of:
6048
6049 fold_convert (TYPE, VALUE)
6050
6051 with the expectation that the operation will be vectorized.
6052 If new statements are needed, add them as pattern statements
6053 to STMT_INFO. */
6054
6055 static tree
6056 vect_add_conversion_to_pattern (vec_info *vinfo,
6057 tree type, tree value, stmt_vec_info stmt_info)
6058 {
6059 if (useless_type_conversion_p (type, TREE_TYPE (value)))
6060 return value;
6061
6062 tree new_value = vect_recog_temp_ssa_var (type, NULL);
6063 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
6064 append_pattern_def_seq (vinfo, stmt_info, conversion,
6065 get_vectype_for_scalar_type (vinfo, type));
6066 return new_value;
6067 }
6068
6069 /* Try to convert STMT_INFO into a call to a gather load or scatter store
6070 internal function. Return the final statement on success and set
6071 *TYPE_OUT to the vector type being loaded or stored.
6072
6073 This function only handles gathers and scatters that were recognized
6074 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6075
6076 static gimple *
6077 vect_recog_gather_scatter_pattern (vec_info *vinfo,
6078 stmt_vec_info stmt_info, tree *type_out)
6079 {
6080 /* Currently we only support this for loop vectorization. */
6081 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6082 if (!loop_vinfo)
6083 return NULL;
6084
6085 /* Make sure that we're looking at a gather load or scatter store. */
6086 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6087 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6088 return NULL;
6089
6090 /* Get the boolean that controls whether the load or store happens.
6091 This is null if the operation is unconditional. */
6092 tree mask = vect_get_load_store_mask (stmt_info);
6093
6094 /* Make sure that the target supports an appropriate internal
6095 function for the gather/scatter operation. */
6096 gather_scatter_info gs_info;
6097 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
6098 || gs_info.ifn == IFN_LAST)
6099 return NULL;
6100
6101 /* Convert the mask to the right form. */
6102 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
6103 gs_info.element_type);
6104 if (mask)
6105 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
6106 loop_vinfo);
6107 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
6108 || gs_info.ifn == IFN_MASK_GATHER_LOAD
6109 || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
6110 || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
6111 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
6112
6113 /* Get the invariant base and non-invariant offset, converting the
6114 latter to the same width as the vector elements. */
6115 tree base = gs_info.base;
6116 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
6117 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
6118 gs_info.offset, stmt_info);
6119
6120 /* Build the new pattern statement. */
6121 tree scale = size_int (gs_info.scale);
6122 gcall *pattern_stmt;
6123 if (DR_IS_READ (dr))
6124 {
6125 tree zero = build_zero_cst (gs_info.element_type);
6126 if (mask != NULL)
6127 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
6128 offset, scale, zero, mask);
6129 else
6130 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
6131 offset, scale, zero);
6132 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
6133 gimple_call_set_lhs (pattern_stmt, load_lhs);
6134 }
6135 else
6136 {
6137 tree rhs = vect_get_store_rhs (stmt_info);
6138 if (mask != NULL)
6139 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
6140 base, offset, scale, rhs,
6141 mask);
6142 else
6143 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
6144 base, offset, scale, rhs);
6145 }
6146 gimple_call_set_nothrow (pattern_stmt, true);
6147
6148 /* Copy across relevant vectorization info and associate DR with the
6149 new pattern statement instead of the original statement. */
6150 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
6151 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
6152
6153 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6154 *type_out = vectype;
6155 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
6156
6157 return pattern_stmt;
6158 }
6159
6160 /* Return true if TYPE is a non-boolean integer type. These are the types
6161 that we want to consider for narrowing. */
6162
6163 static bool
6164 vect_narrowable_type_p (tree type)
6165 {
6166 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
6167 }
6168
6169 /* Return true if the operation given by CODE can be truncated to N bits
6170 when only N bits of the output are needed. This is only true if bit N+1
6171 of the inputs has no effect on the low N bits of the result. */
6172
6173 static bool
6174 vect_truncatable_operation_p (tree_code code)
6175 {
6176 switch (code)
6177 {
6178 case PLUS_EXPR:
6179 case MINUS_EXPR:
6180 case MULT_EXPR:
6181 case BIT_AND_EXPR:
6182 case BIT_IOR_EXPR:
6183 case BIT_XOR_EXPR:
6184 case COND_EXPR:
6185 return true;
6186
6187 default:
6188 return false;
6189 }
6190 }
6191
6192 /* Record that STMT_INFO could be changed from operating on TYPE to
6193 operating on a type with the precision and sign given by PRECISION
6194 and SIGN respectively. PRECISION is an arbitrary bit precision;
6195 it might not be a whole number of bytes. */
6196
6197 static void
6198 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
6199 unsigned int precision, signop sign)
6200 {
6201 /* Round the precision up to a whole number of bytes. */
6202 precision = vect_element_precision (precision);
6203 if (precision < TYPE_PRECISION (type)
6204 && (!stmt_info->operation_precision
6205 || stmt_info->operation_precision > precision))
6206 {
6207 stmt_info->operation_precision = precision;
6208 stmt_info->operation_sign = sign;
6209 }
6210 }
6211
6212 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6213 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6214 is an arbitrary bit precision; it might not be a whole number of bytes. */
6215
6216 static void
6217 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
6218 unsigned int min_input_precision)
6219 {
6220 /* This operation in isolation only requires the inputs to have
6221 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6222 that MIN_INPUT_PRECISION is a natural precision for the chain
6223 as a whole. E.g. consider something like:
6224
6225 unsigned short *x, *y;
6226 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6227
6228 The right shift can be done on unsigned chars, and only requires the
6229 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6230 approach would mean turning a natural chain of single-vector unsigned
6231 short operations into one that truncates "*x" and then extends
6232 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6233 operation and one vector for each unsigned char operation.
6234 This would be a significant pessimization.
6235
6236 Instead only propagate the maximum of this precision and the precision
6237 required by the users of the result. This means that we don't pessimize
6238 the case above but continue to optimize things like:
6239
6240 unsigned char *y;
6241 unsigned short *x;
6242 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6243
6244 Here we would truncate two vectors of *x to a single vector of
6245 unsigned chars and use single-vector unsigned char operations for
6246 everything else, rather than doing two unsigned short copies of
6247 "(*x & 0xf0) >> 4" and then truncating the result. */
6248 min_input_precision = MAX (min_input_precision,
6249 stmt_info->min_output_precision);
6250
6251 if (min_input_precision < TYPE_PRECISION (type)
6252 && (!stmt_info->min_input_precision
6253 || stmt_info->min_input_precision > min_input_precision))
6254 stmt_info->min_input_precision = min_input_precision;
6255 }
6256
6257 /* Subroutine of vect_determine_min_output_precision. Return true if
6258 we can calculate a reduced number of output bits for STMT_INFO,
6259 whose result is LHS. */
6260
6261 static bool
6262 vect_determine_min_output_precision_1 (vec_info *vinfo,
6263 stmt_vec_info stmt_info, tree lhs)
6264 {
6265 /* Take the maximum precision required by users of the result. */
6266 unsigned int precision = 0;
6267 imm_use_iterator iter;
6268 use_operand_p use;
6269 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
6270 {
6271 gimple *use_stmt = USE_STMT (use);
6272 if (is_gimple_debug (use_stmt))
6273 continue;
6274 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
6275 if (!use_stmt_info || !use_stmt_info->min_input_precision)
6276 return false;
6277 /* The input precision recorded for COND_EXPRs applies only to the
6278 "then" and "else" values. */
6279 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
6280 if (assign
6281 && gimple_assign_rhs_code (assign) == COND_EXPR
6282 && use->use != gimple_assign_rhs2_ptr (assign)
6283 && use->use != gimple_assign_rhs3_ptr (assign))
6284 return false;
6285 precision = MAX (precision, use_stmt_info->min_input_precision);
6286 }
6287
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_NOTE, vect_location,
6290 "only the low %d bits of %T are significant\n",
6291 precision, lhs);
6292 stmt_info->min_output_precision = precision;
6293 return true;
6294 }
6295
6296 /* Calculate min_output_precision for STMT_INFO. */
6297
6298 static void
6299 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6300 {
6301 /* We're only interested in statements with a narrowable result. */
6302 tree lhs = gimple_get_lhs (stmt_info->stmt);
6303 if (!lhs
6304 || TREE_CODE (lhs) != SSA_NAME
6305 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
6306 return;
6307
6308 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
6309 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
6310 }
6311
6312 /* Use range information to decide whether STMT (described by STMT_INFO)
6313 could be done in a narrower type. This is effectively a forward
6314 propagation, since it uses context-independent information that applies
6315 to all users of an SSA name. */
6316
6317 static void
6318 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
6319 {
6320 tree lhs = gimple_assign_lhs (stmt);
6321 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
6322 return;
6323
6324 tree type = TREE_TYPE (lhs);
6325 if (!vect_narrowable_type_p (type))
6326 return;
6327
6328 /* First see whether we have any useful range information for the result. */
6329 unsigned int precision = TYPE_PRECISION (type);
6330 signop sign = TYPE_SIGN (type);
6331 wide_int min_value, max_value;
6332 if (!vect_get_range_info (lhs, &min_value, &max_value))
6333 return;
6334
6335 tree_code code = gimple_assign_rhs_code (stmt);
6336 unsigned int nops = gimple_num_ops (stmt);
6337
6338 if (!vect_truncatable_operation_p (code))
6339 /* Check that all relevant input operands are compatible, and update
6340 [MIN_VALUE, MAX_VALUE] to include their ranges. */
6341 for (unsigned int i = 1; i < nops; ++i)
6342 {
6343 tree op = gimple_op (stmt, i);
6344 if (TREE_CODE (op) == INTEGER_CST)
6345 {
6346 /* Don't require the integer to have RHS_TYPE (which it might
6347 not for things like shift amounts, etc.), but do require it
6348 to fit the type. */
6349 if (!int_fits_type_p (op, type))
6350 return;
6351
6352 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
6353 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
6354 }
6355 else if (TREE_CODE (op) == SSA_NAME)
6356 {
6357 /* Ignore codes that don't take uniform arguments. */
6358 if (!types_compatible_p (TREE_TYPE (op), type))
6359 return;
6360
6361 wide_int op_min_value, op_max_value;
6362 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
6363 return;
6364
6365 min_value = wi::min (min_value, op_min_value, sign);
6366 max_value = wi::max (max_value, op_max_value, sign);
6367 }
6368 else
6369 return;
6370 }
6371
6372 /* Try to switch signed types for unsigned types if we can.
6373 This is better for two reasons. First, unsigned ops tend
6374 to be cheaper than signed ops. Second, it means that we can
6375 handle things like:
6376
6377 signed char c;
6378 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6379
6380 as:
6381
6382 signed char c;
6383 unsigned short res_1 = (unsigned short) c & 0xff00;
6384 int res = (int) res_1;
6385
6386 where the intermediate result res_1 has unsigned rather than
6387 signed type. */
6388 if (sign == SIGNED && !wi::neg_p (min_value))
6389 sign = UNSIGNED;
6390
6391 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6392 unsigned int precision1 = wi::min_precision (min_value, sign);
6393 unsigned int precision2 = wi::min_precision (max_value, sign);
6394 unsigned int value_precision = MAX (precision1, precision2);
6395 if (value_precision >= precision)
6396 return;
6397
6398 if (dump_enabled_p ())
6399 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6400 " without loss of precision: %G",
6401 sign == SIGNED ? "signed" : "unsigned",
6402 value_precision, (gimple *) stmt);
6403
6404 vect_set_operation_type (stmt_info, type, value_precision, sign);
6405 vect_set_min_input_precision (stmt_info, type, value_precision);
6406 }
6407
6408 /* Use information about the users of STMT's result to decide whether
6409 STMT (described by STMT_INFO) could be done in a narrower type.
6410 This is effectively a backward propagation. */
6411
6412 static void
6413 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
6414 {
6415 tree_code code = gimple_assign_rhs_code (stmt);
6416 unsigned int opno = (code == COND_EXPR ? 2 : 1);
6417 tree type = TREE_TYPE (gimple_op (stmt, opno));
6418 if (!vect_narrowable_type_p (type))
6419 return;
6420
6421 unsigned int precision = TYPE_PRECISION (type);
6422 unsigned int operation_precision, min_input_precision;
6423 switch (code)
6424 {
6425 CASE_CONVERT:
6426 /* Only the bits that contribute to the output matter. Don't change
6427 the precision of the operation itself. */
6428 operation_precision = precision;
6429 min_input_precision = stmt_info->min_output_precision;
6430 break;
6431
6432 case LSHIFT_EXPR:
6433 case RSHIFT_EXPR:
6434 {
6435 tree shift = gimple_assign_rhs2 (stmt);
6436 if (TREE_CODE (shift) != INTEGER_CST
6437 || !wi::ltu_p (wi::to_widest (shift), precision))
6438 return;
6439 unsigned int const_shift = TREE_INT_CST_LOW (shift);
6440 if (code == LSHIFT_EXPR)
6441 {
6442 /* Avoid creating an undefined shift.
6443
6444 ??? We could instead use min_output_precision as-is and
6445 optimize out-of-range shifts to zero. However, only
6446 degenerate testcases shift away all their useful input data,
6447 and it isn't natural to drop input operations in the middle
6448 of vectorization. This sort of thing should really be
6449 handled before vectorization. */
6450 operation_precision = MAX (stmt_info->min_output_precision,
6451 const_shift + 1);
6452 /* We need CONST_SHIFT fewer bits of the input. */
6453 min_input_precision = (MAX (operation_precision, const_shift)
6454 - const_shift);
6455 }
6456 else
6457 {
6458 /* We need CONST_SHIFT extra bits to do the operation. */
6459 operation_precision = (stmt_info->min_output_precision
6460 + const_shift);
6461 min_input_precision = operation_precision;
6462 }
6463 break;
6464 }
6465
6466 default:
6467 if (vect_truncatable_operation_p (code))
6468 {
6469 /* Input bit N has no effect on output bits N-1 and lower. */
6470 operation_precision = stmt_info->min_output_precision;
6471 min_input_precision = operation_precision;
6472 break;
6473 }
6474 return;
6475 }
6476
6477 if (operation_precision < precision)
6478 {
6479 if (dump_enabled_p ())
6480 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6481 " without affecting users: %G",
6482 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
6483 operation_precision, (gimple *) stmt);
6484 vect_set_operation_type (stmt_info, type, operation_precision,
6485 TYPE_SIGN (type));
6486 }
6487 vect_set_min_input_precision (stmt_info, type, min_input_precision);
6488 }
6489
6490 /* Return true if the statement described by STMT_INFO sets a boolean
6491 SSA_NAME and if we know how to vectorize this kind of statement using
6492 vector mask types. */
6493
6494 static bool
6495 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
6496 {
6497 tree lhs = gimple_get_lhs (stmt_info->stmt);
6498 if (!lhs
6499 || TREE_CODE (lhs) != SSA_NAME
6500 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6501 return false;
6502
6503 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6504 {
6505 tree_code rhs_code = gimple_assign_rhs_code (assign);
6506 switch (rhs_code)
6507 {
6508 CASE_CONVERT:
6509 case SSA_NAME:
6510 case BIT_NOT_EXPR:
6511 case BIT_IOR_EXPR:
6512 case BIT_XOR_EXPR:
6513 case BIT_AND_EXPR:
6514 return true;
6515
6516 default:
6517 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
6518 }
6519 }
6520 else if (is_a <gphi *> (stmt_info->stmt))
6521 return true;
6522 return false;
6523 }
6524
6525 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6526 a vector mask type instead of a normal vector type. Record the
6527 result in STMT_INFO->mask_precision. */
6528
6529 static void
6530 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6531 {
6532 if (!possible_vector_mask_operation_p (stmt_info))
6533 return;
6534
6535 /* If at least one boolean input uses a vector mask type,
6536 pick the mask type with the narrowest elements.
6537
6538 ??? This is the traditional behavior. It should always produce
6539 the smallest number of operations, but isn't necessarily the
6540 optimal choice. For example, if we have:
6541
6542 a = b & c
6543
6544 where:
6545
6546 - the user of a wants it to have a mask type for 16-bit elements (M16)
6547 - b also uses M16
6548 - c uses a mask type for 8-bit elements (M8)
6549
6550 then picking M8 gives:
6551
6552 - 1 M16->M8 pack for b
6553 - 1 M8 AND for a
6554 - 2 M8->M16 unpacks for the user of a
6555
6556 whereas picking M16 would have given:
6557
6558 - 2 M8->M16 unpacks for c
6559 - 2 M16 ANDs for a
6560
6561 The number of operations are equal, but M16 would have given
6562 a shorter dependency chain and allowed more ILP. */
6563 unsigned int precision = ~0U;
6564 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6565 {
6566 unsigned int nops = gimple_num_ops (assign);
6567 for (unsigned int i = 1; i < nops; ++i)
6568 {
6569 tree rhs = gimple_op (assign, i);
6570 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
6571 continue;
6572
6573 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6574 if (!def_stmt_info)
6575 /* Don't let external or constant operands influence the choice.
6576 We can convert them to whichever vector type we pick. */
6577 continue;
6578
6579 if (def_stmt_info->mask_precision)
6580 {
6581 if (precision > def_stmt_info->mask_precision)
6582 precision = def_stmt_info->mask_precision;
6583 }
6584 }
6585
6586 /* If the statement compares two values that shouldn't use vector masks,
6587 try comparing the values as normal scalars instead. */
6588 tree_code rhs_code = gimple_assign_rhs_code (assign);
6589 if (precision == ~0U
6590 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
6591 {
6592 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
6593 scalar_mode mode;
6594 tree vectype, mask_type;
6595 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
6596 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
6597 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
6598 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
6599 precision = GET_MODE_BITSIZE (mode);
6600 }
6601 }
6602 else
6603 {
6604 gphi *phi = as_a <gphi *> (stmt_info->stmt);
6605 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
6606 {
6607 tree rhs = gimple_phi_arg_def (phi, i);
6608
6609 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6610 if (!def_stmt_info)
6611 /* Don't let external or constant operands influence the choice.
6612 We can convert them to whichever vector type we pick. */
6613 continue;
6614
6615 if (def_stmt_info->mask_precision)
6616 {
6617 if (precision > def_stmt_info->mask_precision)
6618 precision = def_stmt_info->mask_precision;
6619 }
6620 }
6621 }
6622
6623 if (dump_enabled_p ())
6624 {
6625 if (precision == ~0U)
6626 dump_printf_loc (MSG_NOTE, vect_location,
6627 "using normal nonmask vectors for %G",
6628 stmt_info->stmt);
6629 else
6630 dump_printf_loc (MSG_NOTE, vect_location,
6631 "using boolean precision %d for %G",
6632 precision, stmt_info->stmt);
6633 }
6634
6635 stmt_info->mask_precision = precision;
6636 }
6637
6638 /* Handle vect_determine_precisions for STMT_INFO, given that we
6639 have already done so for the users of its result. */
6640
6641 void
6642 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
6643 {
6644 vect_determine_min_output_precision (vinfo, stmt_info);
6645 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6646 {
6647 vect_determine_precisions_from_range (stmt_info, stmt);
6648 vect_determine_precisions_from_users (stmt_info, stmt);
6649 }
6650 }
6651
6652 /* Walk backwards through the vectorizable region to determine the
6653 values of these fields:
6654
6655 - min_output_precision
6656 - min_input_precision
6657 - operation_precision
6658 - operation_sign. */
6659
6660 void
6661 vect_determine_precisions (vec_info *vinfo)
6662 {
6663 DUMP_VECT_SCOPE ("vect_determine_precisions");
6664
6665 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
6666 {
6667 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6668 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6669 unsigned int nbbs = loop->num_nodes;
6670
6671 for (unsigned int i = 0; i < nbbs; i++)
6672 {
6673 basic_block bb = bbs[i];
6674 for (auto gsi = gsi_start_phis (bb);
6675 !gsi_end_p (gsi); gsi_next (&gsi))
6676 {
6677 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6678 if (stmt_info)
6679 vect_determine_mask_precision (vinfo, stmt_info);
6680 }
6681 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
6682 if (!is_gimple_debug (gsi_stmt (si)))
6683 vect_determine_mask_precision
6684 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6685 }
6686 for (unsigned int i = 0; i < nbbs; i++)
6687 {
6688 basic_block bb = bbs[nbbs - i - 1];
6689 for (gimple_stmt_iterator si = gsi_last_bb (bb);
6690 !gsi_end_p (si); gsi_prev (&si))
6691 if (!is_gimple_debug (gsi_stmt (si)))
6692 vect_determine_stmt_precisions
6693 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6694 for (auto gsi = gsi_start_phis (bb);
6695 !gsi_end_p (gsi); gsi_next (&gsi))
6696 {
6697 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6698 if (stmt_info)
6699 vect_determine_stmt_precisions (vinfo, stmt_info);
6700 }
6701 }
6702 }
6703 else
6704 {
6705 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6706 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6707 {
6708 basic_block bb = bb_vinfo->bbs[i];
6709 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6710 {
6711 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6712 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6713 vect_determine_mask_precision (vinfo, stmt_info);
6714 }
6715 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6716 {
6717 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6718 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6719 vect_determine_mask_precision (vinfo, stmt_info);
6720 }
6721 }
6722 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
6723 {
6724 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
6725 !gsi_end_p (gsi); gsi_prev (&gsi))
6726 {
6727 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6728 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6729 vect_determine_stmt_precisions (vinfo, stmt_info);
6730 }
6731 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
6732 !gsi_end_p (gsi); gsi_next (&gsi))
6733 {
6734 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6735 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6736 vect_determine_stmt_precisions (vinfo, stmt_info);
6737 }
6738 }
6739 }
6740 }
6741
6742 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
6743
6744 struct vect_recog_func
6745 {
6746 vect_recog_func_ptr fn;
6747 const char *name;
6748 };
6749
6750 /* Note that ordering matters - the first pattern matching on a stmt is
6751 taken which means usually the more complex one needs to preceed the
6752 less comples onex (widen_sum only after dot_prod or sad for example). */
6753 static vect_recog_func vect_vect_recog_func_ptrs[] = {
6754 { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
6755 { vect_recog_bit_insert_pattern, "bit_insert" },
6756 { vect_recog_abd_pattern, "abd" },
6757 { vect_recog_over_widening_pattern, "over_widening" },
6758 /* Must come after over_widening, which narrows the shift as much as
6759 possible beforehand. */
6760 { vect_recog_average_pattern, "average" },
6761 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
6762 { vect_recog_mulhs_pattern, "mult_high" },
6763 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
6764 { vect_recog_widen_mult_pattern, "widen_mult" },
6765 { vect_recog_dot_prod_pattern, "dot_prod" },
6766 { vect_recog_sad_pattern, "sad" },
6767 { vect_recog_widen_sum_pattern, "widen_sum" },
6768 { vect_recog_pow_pattern, "pow" },
6769 { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
6770 { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
6771 { vect_recog_widen_shift_pattern, "widen_shift" },
6772 { vect_recog_rotate_pattern, "rotate" },
6773 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
6774 { vect_recog_divmod_pattern, "divmod" },
6775 { vect_recog_mult_pattern, "mult" },
6776 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
6777 { vect_recog_bool_pattern, "bool" },
6778 /* This must come before mask conversion, and includes the parts
6779 of mask conversion that are needed for gather and scatter
6780 internal functions. */
6781 { vect_recog_gather_scatter_pattern, "gather_scatter" },
6782 { vect_recog_mask_conversion_pattern, "mask_conversion" },
6783 { vect_recog_widen_plus_pattern, "widen_plus" },
6784 { vect_recog_widen_minus_pattern, "widen_minus" },
6785 { vect_recog_widen_abd_pattern, "widen_abd" },
6786 /* These must come after the double widening ones. */
6787 };
6788
6789 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
6790
6791 /* Mark statements that are involved in a pattern. */
6792
6793 void
6794 vect_mark_pattern_stmts (vec_info *vinfo,
6795 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
6796 tree pattern_vectype)
6797 {
6798 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
6799 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6800
6801 gimple *orig_pattern_stmt = NULL;
6802 if (is_pattern_stmt_p (orig_stmt_info))
6803 {
6804 /* We're replacing a statement in an existing pattern definition
6805 sequence. */
6806 orig_pattern_stmt = orig_stmt_info->stmt;
6807 if (dump_enabled_p ())
6808 dump_printf_loc (MSG_NOTE, vect_location,
6809 "replacing earlier pattern %G", orig_pattern_stmt);
6810
6811 /* To keep the book-keeping simple, just swap the lhs of the
6812 old and new statements, so that the old one has a valid but
6813 unused lhs. */
6814 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
6815 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
6816 gimple_set_lhs (pattern_stmt, old_lhs);
6817
6818 if (dump_enabled_p ())
6819 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
6820
6821 /* Switch to the statement that ORIG replaces. */
6822 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
6823
6824 /* We shouldn't be replacing the main pattern statement. */
6825 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
6826 != orig_pattern_stmt);
6827 }
6828
6829 if (def_seq)
6830 for (gimple_stmt_iterator si = gsi_start (def_seq);
6831 !gsi_end_p (si); gsi_next (&si))
6832 {
6833 if (dump_enabled_p ())
6834 dump_printf_loc (MSG_NOTE, vect_location,
6835 "extra pattern stmt: %G", gsi_stmt (si));
6836 stmt_vec_info pattern_stmt_info
6837 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
6838 orig_stmt_info, pattern_vectype);
6839 /* Stmts in the def sequence are not vectorizable cycle or
6840 induction defs, instead they should all be vect_internal_def
6841 feeding the main pattern stmt which retains this def type. */
6842 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
6843 }
6844
6845 if (orig_pattern_stmt)
6846 {
6847 vect_init_pattern_stmt (vinfo, pattern_stmt,
6848 orig_stmt_info, pattern_vectype);
6849
6850 /* Insert all the new pattern statements before the original one. */
6851 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6852 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
6853 orig_def_seq);
6854 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
6855 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
6856
6857 /* Remove the pattern statement that this new pattern replaces. */
6858 gsi_remove (&gsi, false);
6859 }
6860 else
6861 vect_set_pattern_stmt (vinfo,
6862 pattern_stmt, orig_stmt_info, pattern_vectype);
6863
6864 /* Transfer reduction path info to the pattern. */
6865 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
6866 {
6867 gimple_match_op op;
6868 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
6869 gcc_unreachable ();
6870 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
6871 /* Search the pattern def sequence and the main pattern stmt. Note
6872 we may have inserted all into a containing pattern def sequence
6873 so the following is a bit awkward. */
6874 gimple_stmt_iterator si;
6875 gimple *s;
6876 if (def_seq)
6877 {
6878 si = gsi_start (def_seq);
6879 s = gsi_stmt (si);
6880 gsi_next (&si);
6881 }
6882 else
6883 {
6884 si = gsi_none ();
6885 s = pattern_stmt;
6886 }
6887 do
6888 {
6889 bool found = false;
6890 if (gimple_extract_op (s, &op))
6891 for (unsigned i = 0; i < op.num_ops; ++i)
6892 if (op.ops[i] == lookfor)
6893 {
6894 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
6895 lookfor = gimple_get_lhs (s);
6896 found = true;
6897 break;
6898 }
6899 if (s == pattern_stmt)
6900 {
6901 if (!found && dump_enabled_p ())
6902 dump_printf_loc (MSG_NOTE, vect_location,
6903 "failed to update reduction index.\n");
6904 break;
6905 }
6906 if (gsi_end_p (si))
6907 s = pattern_stmt;
6908 else
6909 {
6910 s = gsi_stmt (si);
6911 if (s == pattern_stmt)
6912 /* Found the end inside a bigger pattern def seq. */
6913 si = gsi_none ();
6914 else
6915 gsi_next (&si);
6916 }
6917 } while (1);
6918 }
6919 }
6920
6921 /* Function vect_pattern_recog_1
6922
6923 Input:
6924 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
6925 computation pattern.
6926 STMT_INFO: A stmt from which the pattern search should start.
6927
6928 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
6929 a sequence of statements that has the same functionality and can be
6930 used to replace STMT_INFO. It returns the last statement in the sequence
6931 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
6932 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
6933 statement, having first checked that the target supports the new operation
6934 in that type.
6935
6936 This function also does some bookkeeping, as explained in the documentation
6937 for vect_recog_pattern. */
6938
6939 static void
6940 vect_pattern_recog_1 (vec_info *vinfo,
6941 vect_recog_func *recog_func, stmt_vec_info stmt_info)
6942 {
6943 gimple *pattern_stmt;
6944 loop_vec_info loop_vinfo;
6945 tree pattern_vectype;
6946
6947 /* If this statement has already been replaced with pattern statements,
6948 leave the original statement alone, since the first match wins.
6949 Instead try to match against the definition statements that feed
6950 the main pattern statement. */
6951 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6952 {
6953 gimple_stmt_iterator gsi;
6954 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
6955 !gsi_end_p (gsi); gsi_next (&gsi))
6956 vect_pattern_recog_1 (vinfo, recog_func,
6957 vinfo->lookup_stmt (gsi_stmt (gsi)));
6958 return;
6959 }
6960
6961 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
6962 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
6963 if (!pattern_stmt)
6964 {
6965 /* Clear any half-formed pattern definition sequence. */
6966 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
6967 return;
6968 }
6969
6970 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6971
6972 /* Found a vectorizable pattern. */
6973 if (dump_enabled_p ())
6974 dump_printf_loc (MSG_NOTE, vect_location,
6975 "%s pattern recognized: %G",
6976 recog_func->name, pattern_stmt);
6977
6978 /* Mark the stmts that are involved in the pattern. */
6979 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
6980
6981 /* Patterns cannot be vectorized using SLP, because they change the order of
6982 computation. */
6983 if (loop_vinfo)
6984 {
6985 unsigned ix, ix2;
6986 stmt_vec_info *elem_ptr;
6987 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
6988 elem_ptr, *elem_ptr == stmt_info);
6989 }
6990 }
6991
6992
6993 /* Function vect_pattern_recog
6994
6995 Input:
6996 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
6997 computation idioms.
6998
6999 Output - for each computation idiom that is detected we create a new stmt
7000 that provides the same functionality and that can be vectorized. We
7001 also record some information in the struct_stmt_info of the relevant
7002 stmts, as explained below:
7003
7004 At the entry to this function we have the following stmts, with the
7005 following initial value in the STMT_VINFO fields:
7006
7007 stmt in_pattern_p related_stmt vec_stmt
7008 S1: a_i = .... - - -
7009 S2: a_2 = ..use(a_i).. - - -
7010 S3: a_1 = ..use(a_2).. - - -
7011 S4: a_0 = ..use(a_1).. - - -
7012 S5: ... = ..use(a_0).. - - -
7013
7014 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7015 represented by a single stmt. We then:
7016 - create a new stmt S6 equivalent to the pattern (the stmt is not
7017 inserted into the code)
7018 - fill in the STMT_VINFO fields as follows:
7019
7020 in_pattern_p related_stmt vec_stmt
7021 S1: a_i = .... - - -
7022 S2: a_2 = ..use(a_i).. - - -
7023 S3: a_1 = ..use(a_2).. - - -
7024 S4: a_0 = ..use(a_1).. true S6 -
7025 '---> S6: a_new = .... - S4 -
7026 S5: ... = ..use(a_0).. - - -
7027
7028 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7029 to each other through the RELATED_STMT field).
7030
7031 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7032 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7033 remain irrelevant unless used by stmts other than S4.
7034
7035 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7036 (because they are marked as irrelevant). It will vectorize S6, and record
7037 a pointer to the new vector stmt VS6 from S6 (as usual).
7038 S4 will be skipped, and S5 will be vectorized as usual:
7039
7040 in_pattern_p related_stmt vec_stmt
7041 S1: a_i = .... - - -
7042 S2: a_2 = ..use(a_i).. - - -
7043 S3: a_1 = ..use(a_2).. - - -
7044 > VS6: va_new = .... - - -
7045 S4: a_0 = ..use(a_1).. true S6 VS6
7046 '---> S6: a_new = .... - S4 VS6
7047 > VS5: ... = ..vuse(va_new).. - - -
7048 S5: ... = ..use(a_0).. - - -
7049
7050 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7051 elsewhere), and we'll end up with:
7052
7053 VS6: va_new = ....
7054 VS5: ... = ..vuse(va_new)..
7055
7056 In case of more than one pattern statements, e.g., widen-mult with
7057 intermediate type:
7058
7059 S1 a_t = ;
7060 S2 a_T = (TYPE) a_t;
7061 '--> S3: a_it = (interm_type) a_t;
7062 S4 prod_T = a_T * CONST;
7063 '--> S5: prod_T' = a_it w* CONST;
7064
7065 there may be other users of a_T outside the pattern. In that case S2 will
7066 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7067 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7068 be recorded in S3. */
7069
7070 void
7071 vect_pattern_recog (vec_info *vinfo)
7072 {
7073 class loop *loop;
7074 basic_block *bbs;
7075 unsigned int nbbs;
7076 gimple_stmt_iterator si;
7077 unsigned int i, j;
7078
7079 vect_determine_precisions (vinfo);
7080
7081 DUMP_VECT_SCOPE ("vect_pattern_recog");
7082
7083 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
7084 {
7085 loop = LOOP_VINFO_LOOP (loop_vinfo);
7086 bbs = LOOP_VINFO_BBS (loop_vinfo);
7087 nbbs = loop->num_nodes;
7088
7089 /* Scan through the loop stmts, applying the pattern recognition
7090 functions starting at each stmt visited: */
7091 for (i = 0; i < nbbs; i++)
7092 {
7093 basic_block bb = bbs[i];
7094 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
7095 {
7096 if (is_gimple_debug (gsi_stmt (si)))
7097 continue;
7098 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
7099 /* Scan over all generic vect_recog_xxx_pattern functions. */
7100 for (j = 0; j < NUM_PATTERNS; j++)
7101 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
7102 stmt_info);
7103 }
7104 }
7105 }
7106 else
7107 {
7108 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
7109 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
7110 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
7111 !gsi_end_p (gsi); gsi_next (&gsi))
7112 {
7113 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
7114 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
7115 continue;
7116
7117 /* Scan over all generic vect_recog_xxx_pattern functions. */
7118 for (j = 0; j < NUM_PATTERNS; j++)
7119 vect_pattern_recog_1 (vinfo,
7120 &vect_vect_recog_func_ptrs[j], stmt_info);
7121 }
7122 }
7123
7124 /* After this no more add_stmt calls are allowed. */
7125 vinfo->stmt_vec_info_ro = true;
7126 }
7127
7128 /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7129 or internal_fn contained in ch, respectively. */
7130 gimple *
7131 vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
7132 {
7133 gcc_assert (op0 != NULL_TREE);
7134 if (ch.is_tree_code ())
7135 return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
7136
7137 gcc_assert (ch.is_internal_fn ());
7138 gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
7139 op1 == NULL_TREE ? 1 : 2,
7140 op0, op1);
7141 gimple_call_set_lhs (stmt, lhs);
7142 return stmt;
7143 }