]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-patterns.c
[6/6] Preprocessor forced macro location
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
52
53 static bool
54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
55 {
56 value_range_kind vr_type = get_range_info (var, min_value, max_value);
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
61 {
62 if (dump_enabled_p ())
63 {
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
70 }
71 return true;
72 }
73 else
74 {
75 if (dump_enabled_p ())
76 {
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
79 }
80 return false;
81 }
82 }
83
84 /* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
86
87 static void
88 vect_pattern_detected (const char *name, gimple *stmt)
89 {
90 if (dump_enabled_p ())
91 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
92 }
93
94 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
95 return the pattern statement's stmt_vec_info. Set its vector type to
96 VECTYPE if it doesn't have one already. */
97
98 static stmt_vec_info
99 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
100 tree vectype)
101 {
102 vec_info *vinfo = orig_stmt_info->vinfo;
103 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
104 if (pattern_stmt_info == NULL)
105 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
106 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
107
108 pattern_stmt_info->pattern_stmt_p = true;
109 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
110 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
111 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
112 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
113 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
114 return pattern_stmt_info;
115 }
116
117 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
118 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
119 have one already. */
120
121 static void
122 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
123 tree vectype)
124 {
125 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
126 STMT_VINFO_RELATED_STMT (orig_stmt_info)
127 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
128 }
129
130 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
131 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
132 be different from the vector type of the final pattern statement. */
133
134 static inline void
135 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
136 tree vectype = NULL_TREE)
137 {
138 vec_info *vinfo = stmt_info->vinfo;
139 if (vectype)
140 {
141 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
142 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
143 }
144 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
145 new_stmt);
146 }
147
148 /* The caller wants to perform new operations on vect_external variable
149 VAR, so that the result of the operations would also be vect_external.
150 Return the edge on which the operations can be performed, if one exists.
151 Return null if the operations should instead be treated as part of
152 the pattern that needs them. */
153
154 static edge
155 vect_get_external_def_edge (vec_info *vinfo, tree var)
156 {
157 edge e = NULL;
158 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
159 {
160 e = loop_preheader_edge (loop_vinfo->loop);
161 if (!SSA_NAME_IS_DEFAULT_DEF (var))
162 {
163 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
164 if (bb == NULL
165 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
166 e = NULL;
167 }
168 }
169 return e;
170 }
171
172 /* Return true if the target supports a vector version of CODE,
173 where CODE is known to map to a direct optab. ITYPE specifies
174 the type of (some of) the scalar inputs and OTYPE specifies the
175 type of the scalar result.
176
177 If CODE allows the inputs and outputs to have different type
178 (such as for WIDEN_SUM_EXPR), it is the input mode rather
179 than the output mode that determines the appropriate target pattern.
180 Operand 0 of the target pattern then specifies the mode that the output
181 must have.
182
183 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
184 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
185 is nonnull. */
186
187 static bool
188 vect_supportable_direct_optab_p (tree otype, tree_code code,
189 tree itype, tree *vecotype_out,
190 tree *vecitype_out = NULL)
191 {
192 tree vecitype = get_vectype_for_scalar_type (itype);
193 if (!vecitype)
194 return false;
195
196 tree vecotype = get_vectype_for_scalar_type (otype);
197 if (!vecotype)
198 return false;
199
200 optab optab = optab_for_tree_code (code, vecitype, optab_default);
201 if (!optab)
202 return false;
203
204 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
205 if (icode == CODE_FOR_nothing
206 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
207 return false;
208
209 *vecotype_out = vecotype;
210 if (vecitype_out)
211 *vecitype_out = vecitype;
212 return true;
213 }
214
215 /* Round bit precision PRECISION up to a full element. */
216
217 static unsigned int
218 vect_element_precision (unsigned int precision)
219 {
220 precision = 1 << ceil_log2 (precision);
221 return MAX (precision, BITS_PER_UNIT);
222 }
223
224 /* If OP is defined by a statement that's being considered for vectorization,
225 return information about that statement, otherwise return NULL. */
226
227 static stmt_vec_info
228 vect_get_internal_def (vec_info *vinfo, tree op)
229 {
230 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
231 if (def_stmt_info
232 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
233 return def_stmt_info;
234 return NULL;
235 }
236
237 /* Check whether NAME, an ssa-name used in STMT_VINFO,
238 is a result of a type promotion, such that:
239 DEF_STMT: NAME = NOP (name0)
240 If CHECK_SIGN is TRUE, check that either both types are signed or both are
241 unsigned. */
242
243 static bool
244 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
245 tree *orig_type, gimple **def_stmt, bool *promotion)
246 {
247 tree type = TREE_TYPE (name);
248 tree oprnd0;
249 enum vect_def_type dt;
250
251 stmt_vec_info def_stmt_info;
252 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
253 def_stmt))
254 return false;
255
256 if (dt != vect_internal_def
257 && dt != vect_external_def && dt != vect_constant_def)
258 return false;
259
260 if (!*def_stmt)
261 return false;
262
263 if (!is_gimple_assign (*def_stmt))
264 return false;
265
266 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
267 return false;
268
269 oprnd0 = gimple_assign_rhs1 (*def_stmt);
270
271 *orig_type = TREE_TYPE (oprnd0);
272 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
273 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
274 return false;
275
276 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
277 *promotion = true;
278 else
279 *promotion = false;
280
281 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
282 return false;
283
284 return true;
285 }
286
287 /* Holds information about an input operand after some sign changes
288 and type promotions have been peeled away. */
289 struct vect_unpromoted_value {
290 vect_unpromoted_value ();
291
292 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
293
294 /* The value obtained after peeling away zero or more casts. */
295 tree op;
296
297 /* The type of OP. */
298 tree type;
299
300 /* The definition type of OP. */
301 vect_def_type dt;
302
303 /* If OP is the result of peeling at least one cast, and if the cast
304 of OP itself is a vectorizable statement, CASTER identifies that
305 statement, otherwise it is null. */
306 stmt_vec_info caster;
307 };
308
309 inline vect_unpromoted_value::vect_unpromoted_value ()
310 : op (NULL_TREE),
311 type (NULL_TREE),
312 dt (vect_uninitialized_def),
313 caster (NULL)
314 {
315 }
316
317 /* Set the operand to OP_IN, its definition type to DT_IN, and the
318 statement that casts it to CASTER_IN. */
319
320 inline void
321 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
322 stmt_vec_info caster_in)
323 {
324 op = op_in;
325 type = TREE_TYPE (op);
326 dt = dt_in;
327 caster = caster_in;
328 }
329
330 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
331 to reach some vectorizable inner operand OP', continuing as long as it
332 is possible to convert OP' back to OP using a possible sign change
333 followed by a possible promotion P. Return this OP', or null if OP is
334 not a vectorizable SSA name. If there is a promotion P, describe its
335 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
336 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
337 have more than one user.
338
339 A successful return means that it is possible to go from OP' to OP
340 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
341 whereas the cast from UNPROM to OP might be a promotion, a sign
342 change, or a nop.
343
344 E.g. say we have:
345
346 signed short *ptr = ...;
347 signed short C = *ptr;
348 unsigned short B = (unsigned short) C; // sign change
349 signed int A = (signed int) B; // unsigned promotion
350 ...possible other uses of A...
351 unsigned int OP = (unsigned int) A; // sign change
352
353 In this case it's possible to go directly from C to OP using:
354
355 OP = (unsigned int) (unsigned short) C;
356 +------------+ +--------------+
357 promotion sign change
358
359 so OP' would be C. The input to the promotion is B, so UNPROM
360 would describe B. */
361
362 static tree
363 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
364 vect_unpromoted_value *unprom,
365 bool *single_use_p = NULL)
366 {
367 tree res = NULL_TREE;
368 tree op_type = TREE_TYPE (op);
369 unsigned int orig_precision = TYPE_PRECISION (op_type);
370 stmt_vec_info caster = NULL;
371 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
372 {
373 /* See whether OP is simple enough to vectorize. */
374 stmt_vec_info def_stmt_info;
375 gimple *def_stmt;
376 vect_def_type dt;
377 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
378 break;
379
380 /* If OP is the input of a demotion, skip over it to see whether
381 OP is itself the result of a promotion. If so, the combined
382 effect of the promotion and the demotion might fit the required
383 pattern, otherwise neither operation fits.
384
385 This copes with cases such as the result of an arithmetic
386 operation being truncated before being stored, and where that
387 arithmetic operation has been recognized as an over-widened one. */
388 if (TYPE_PRECISION (op_type) <= orig_precision)
389 {
390 /* Use OP as the UNPROM described above if we haven't yet
391 found a promotion, or if using the new input preserves the
392 sign of the previous promotion. */
393 if (!res
394 || TYPE_PRECISION (unprom->type) == orig_precision
395 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
396 unprom->set_op (op, dt, caster);
397 /* Stop if we've already seen a promotion and if this
398 conversion does more than change the sign. */
399 else if (TYPE_PRECISION (op_type)
400 != TYPE_PRECISION (unprom->type))
401 break;
402
403 /* The sequence now extends to OP. */
404 res = op;
405 }
406
407 /* See whether OP is defined by a cast. Record it as CASTER if
408 the cast is potentially vectorizable. */
409 if (!def_stmt)
410 break;
411 caster = def_stmt_info;
412
413 /* Ignore pattern statements, since we don't link uses for them. */
414 if (caster
415 && single_use_p
416 && !STMT_VINFO_RELATED_STMT (caster)
417 && !has_single_use (res))
418 *single_use_p = false;
419
420 gassign *assign = dyn_cast <gassign *> (def_stmt);
421 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
422 break;
423
424 /* Continue with the input to the cast. */
425 op = gimple_assign_rhs1 (def_stmt);
426 op_type = TREE_TYPE (op);
427 }
428 return res;
429 }
430
431 /* OP is an integer operand to an operation that returns TYPE, and we
432 want to treat the operation as a widening one. So far we can treat
433 it as widening from *COMMON_TYPE.
434
435 Return true if OP is suitable for such a widening operation,
436 either widening from *COMMON_TYPE or from some supertype of it.
437 Update *COMMON_TYPE to the supertype in the latter case.
438
439 SHIFT_P is true if OP is a shift amount. */
440
441 static bool
442 vect_joust_widened_integer (tree type, bool shift_p, tree op,
443 tree *common_type)
444 {
445 /* Calculate the minimum precision required by OP, without changing
446 the sign of either operand. */
447 unsigned int precision;
448 if (shift_p)
449 {
450 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
451 return false;
452 precision = TREE_INT_CST_LOW (op);
453 }
454 else
455 {
456 precision = wi::min_precision (wi::to_widest (op),
457 TYPE_SIGN (*common_type));
458 if (precision * 2 > TYPE_PRECISION (type))
459 return false;
460 }
461
462 /* If OP requires a wider type, switch to that type. The checks
463 above ensure that this is still narrower than the result. */
464 precision = vect_element_precision (precision);
465 if (TYPE_PRECISION (*common_type) < precision)
466 *common_type = build_nonstandard_integer_type
467 (precision, TYPE_UNSIGNED (*common_type));
468 return true;
469 }
470
471 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
472 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
473
474 static bool
475 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
476 {
477 if (types_compatible_p (*common_type, new_type))
478 return true;
479
480 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
481 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
482 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
483 return true;
484
485 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
486 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
487 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
488 {
489 *common_type = new_type;
490 return true;
491 }
492
493 /* We have mismatched signs, with the signed type being
494 no wider than the unsigned type. In this case we need
495 a wider signed type. */
496 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
497 TYPE_PRECISION (new_type));
498 precision *= 2;
499 if (precision * 2 > TYPE_PRECISION (type))
500 return false;
501
502 *common_type = build_nonstandard_integer_type (precision, false);
503 return true;
504 }
505
506 /* Check whether STMT_INFO can be viewed as a tree of integer operations
507 in which each node either performs CODE or WIDENED_CODE, and where
508 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
509 specifies the maximum number of leaf operands. SHIFT_P says whether
510 CODE and WIDENED_CODE are some sort of shift.
511
512 If STMT_INFO is such a tree, return the number of leaf operands
513 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
514 to a type that (a) is narrower than the result of STMT_INFO and
515 (b) can hold all leaf operand values.
516
517 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
518 exists. */
519
520 static unsigned int
521 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
522 tree_code widened_code, bool shift_p,
523 unsigned int max_nops,
524 vect_unpromoted_value *unprom, tree *common_type)
525 {
526 /* Check for an integer operation with the right code. */
527 vec_info *vinfo = stmt_info->vinfo;
528 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
529 if (!assign)
530 return 0;
531
532 tree_code rhs_code = gimple_assign_rhs_code (assign);
533 if (rhs_code != code && rhs_code != widened_code)
534 return 0;
535
536 tree type = gimple_expr_type (assign);
537 if (!INTEGRAL_TYPE_P (type))
538 return 0;
539
540 /* Assume that both operands will be leaf operands. */
541 max_nops -= 2;
542
543 /* Check the operands. */
544 unsigned int next_op = 0;
545 for (unsigned int i = 0; i < 2; ++i)
546 {
547 vect_unpromoted_value *this_unprom = &unprom[next_op];
548 unsigned int nops = 1;
549 tree op = gimple_op (assign, i + 1);
550 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
551 {
552 /* We already have a common type from earlier operands.
553 Update it to account for OP. */
554 this_unprom->set_op (op, vect_constant_def);
555 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
556 return 0;
557 }
558 else
559 {
560 /* Only allow shifts by constants. */
561 if (shift_p && i == 1)
562 return 0;
563
564 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
565 this_unprom))
566 return 0;
567
568 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
569 {
570 /* The operand isn't widened. If STMT_INFO has the code
571 for an unwidened operation, recursively check whether
572 this operand is a node of the tree. */
573 if (rhs_code != code
574 || max_nops == 0
575 || this_unprom->dt != vect_internal_def)
576 return 0;
577
578 /* Give back the leaf slot allocated above now that we're
579 not treating this as a leaf operand. */
580 max_nops += 1;
581
582 /* Recursively process the definition of the operand. */
583 stmt_vec_info def_stmt_info
584 = vinfo->lookup_def (this_unprom->op);
585 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
586 shift_p, max_nops, this_unprom,
587 common_type);
588 if (nops == 0)
589 return 0;
590
591 max_nops -= nops;
592 }
593 else
594 {
595 /* Make sure that the operand is narrower than the result. */
596 if (TYPE_PRECISION (this_unprom->type) * 2
597 > TYPE_PRECISION (type))
598 return 0;
599
600 /* Update COMMON_TYPE for the new operand. */
601 if (i == 0)
602 *common_type = this_unprom->type;
603 else if (!vect_joust_widened_type (type, this_unprom->type,
604 common_type))
605 return 0;
606 }
607 }
608 next_op += nops;
609 }
610 return next_op;
611 }
612
613 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
614 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
615
616 static tree
617 vect_recog_temp_ssa_var (tree type, gimple *stmt)
618 {
619 return make_temp_ssa_name (type, stmt, "patt");
620 }
621
622 /* STMT2_INFO describes a type conversion that could be split into STMT1
623 followed by a version of STMT2_INFO that takes NEW_RHS as its first
624 input. Try to do this using pattern statements, returning true on
625 success. */
626
627 static bool
628 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
629 gimple *stmt1, tree vectype)
630 {
631 if (is_pattern_stmt_p (stmt2_info))
632 {
633 /* STMT2_INFO is part of a pattern. Get the statement to which
634 the pattern is attached. */
635 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
636 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
637
638 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE, vect_location,
640 "Splitting pattern statement: %G", stmt2_info->stmt);
641
642 /* Since STMT2_INFO is a pattern statement, we can change it
643 in-situ without worrying about changing the code for the
644 containing block. */
645 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
646
647 if (dump_enabled_p ())
648 {
649 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
650 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
651 stmt2_info->stmt);
652 }
653
654 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
655 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
656 /* STMT2_INFO is the actual pattern statement. Add STMT1
657 to the end of the definition sequence. */
658 gimple_seq_add_stmt_without_update (def_seq, stmt1);
659 else
660 {
661 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
662 before it. */
663 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
664 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
665 }
666 return true;
667 }
668 else
669 {
670 /* STMT2_INFO doesn't yet have a pattern. Try to create a
671 two-statement pattern now. */
672 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
673 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
674 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
675 if (!lhs_vectype)
676 return false;
677
678 if (dump_enabled_p ())
679 dump_printf_loc (MSG_NOTE, vect_location,
680 "Splitting statement: %G", stmt2_info->stmt);
681
682 /* Add STMT1 as a singleton pattern definition sequence. */
683 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
684 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
685 gimple_seq_add_stmt_without_update (def_seq, stmt1);
686
687 /* Build the second of the two pattern statements. */
688 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
689 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
690 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
691
692 if (dump_enabled_p ())
693 {
694 dump_printf_loc (MSG_NOTE, vect_location,
695 "into pattern statements: %G", stmt1);
696 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
697 }
698
699 return true;
700 }
701 }
702
703 /* Convert UNPROM to TYPE and return the result, adding new statements
704 to STMT_INFO's pattern definition statements if no better way is
705 available. VECTYPE is the vector form of TYPE. */
706
707 static tree
708 vect_convert_input (stmt_vec_info stmt_info, tree type,
709 vect_unpromoted_value *unprom, tree vectype)
710 {
711 /* Check for a no-op conversion. */
712 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
713 return unprom->op;
714
715 /* Allow the caller to create constant vect_unpromoted_values. */
716 if (TREE_CODE (unprom->op) == INTEGER_CST)
717 return wide_int_to_tree (type, wi::to_widest (unprom->op));
718
719 /* See if we can reuse an existing result. */
720 if (unprom->caster)
721 {
722 tree lhs = gimple_get_lhs (unprom->caster->stmt);
723 if (types_compatible_p (TREE_TYPE (lhs), type))
724 return lhs;
725 }
726
727 /* We need a new conversion statement. */
728 tree new_op = vect_recog_temp_ssa_var (type, NULL);
729 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
730
731 /* If the operation is the input to a vectorizable cast, try splitting
732 that cast into two, taking the required result as a mid-way point. */
733 if (unprom->caster)
734 {
735 tree lhs = gimple_get_lhs (unprom->caster->stmt);
736 if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
737 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
738 && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
739 && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
740 return new_op;
741 }
742
743 /* If OP is an external value, see if we can insert the new statement
744 on an incoming edge. */
745 if (unprom->dt == vect_external_def)
746 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
747 {
748 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
749 gcc_assert (!new_bb);
750 return new_op;
751 }
752
753 /* As a (common) last resort, add the statement to the pattern itself. */
754 append_pattern_def_seq (stmt_info, new_stmt, vectype);
755 return new_op;
756 }
757
758 /* Invoke vect_convert_input for N elements of UNPROM and store the
759 result in the corresponding elements of RESULT. */
760
761 static void
762 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
763 tree *result, tree type, vect_unpromoted_value *unprom,
764 tree vectype)
765 {
766 for (unsigned int i = 0; i < n; ++i)
767 {
768 unsigned int j;
769 for (j = 0; j < i; ++j)
770 if (unprom[j].op == unprom[i].op)
771 break;
772 if (j < i)
773 result[i] = result[j];
774 else
775 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
776 }
777 }
778
779 /* The caller has created a (possibly empty) sequence of pattern definition
780 statements followed by a single statement PATTERN_STMT. Cast the result
781 of this final statement to TYPE. If a new statement is needed, add
782 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
783 and return the new statement, otherwise return PATTERN_STMT as-is.
784 VECITYPE is the vector form of PATTERN_STMT's result type. */
785
786 static gimple *
787 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
788 tree vecitype)
789 {
790 tree lhs = gimple_get_lhs (pattern_stmt);
791 if (!types_compatible_p (type, TREE_TYPE (lhs)))
792 {
793 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
794 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
795 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
796 }
797 return pattern_stmt;
798 }
799
800 /* Return true if STMT_VINFO describes a reduction for which reassociation
801 is allowed. If STMT_INFO is part of a group, assume that it's part of
802 a reduction chain and optimistically assume that all statements
803 except the last allow reassociation. */
804
805 static bool
806 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
807 {
808 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
809 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
810 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
811 }
812
813 /* As above, but also require it to have code CODE and to be a reduction
814 in the outermost loop. When returning true, store the operands in
815 *OP0_OUT and *OP1_OUT. */
816
817 static bool
818 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
819 tree *op0_out, tree *op1_out)
820 {
821 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
822 if (!loop_info)
823 return false;
824
825 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
826 if (!assign || gimple_assign_rhs_code (assign) != code)
827 return false;
828
829 /* We don't allow changing the order of the computation in the inner-loop
830 when doing outer-loop vectorization. */
831 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
832 if (loop && nested_in_vect_loop_p (loop, stmt_info))
833 return false;
834
835 if (!vect_reassociating_reduction_p (stmt_info))
836 return false;
837
838 *op0_out = gimple_assign_rhs1 (assign);
839 *op1_out = gimple_assign_rhs2 (assign);
840 return true;
841 }
842
843 /* Function vect_recog_dot_prod_pattern
844
845 Try to find the following pattern:
846
847 type x_t, y_t;
848 TYPE1 prod;
849 TYPE2 sum = init;
850 loop:
851 sum_0 = phi <init, sum_1>
852 S1 x_t = ...
853 S2 y_t = ...
854 S3 x_T = (TYPE1) x_t;
855 S4 y_T = (TYPE1) y_t;
856 S5 prod = x_T * y_T;
857 [S6 prod = (TYPE2) prod; #optional]
858 S7 sum_1 = prod + sum_0;
859
860 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
861 same size of 'TYPE1' or bigger. This is a special case of a reduction
862 computation.
863
864 Input:
865
866 * STMT_VINFO: The stmt from which the pattern search begins. In the
867 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
868 will be detected.
869
870 Output:
871
872 * TYPE_OUT: The type of the output of this pattern.
873
874 * Return value: A new stmt that will be used to replace the sequence of
875 stmts that constitute the pattern. In this case it will be:
876 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
877
878 Note: The dot-prod idiom is a widening reduction pattern that is
879 vectorized without preserving all the intermediate results. It
880 produces only N/2 (widened) results (by summing up pairs of
881 intermediate results) rather than all N results. Therefore, we
882 cannot allow this pattern when we want to get all the results and in
883 the correct order (as is the case when this computation is in an
884 inner-loop nested in an outer-loop that us being vectorized). */
885
886 static gimple *
887 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
888 {
889 tree oprnd0, oprnd1;
890 gimple *last_stmt = stmt_vinfo->stmt;
891 vec_info *vinfo = stmt_vinfo->vinfo;
892 tree type, half_type;
893 gimple *pattern_stmt;
894 tree var;
895
896 /* Look for the following pattern
897 DX = (TYPE1) X;
898 DY = (TYPE1) Y;
899 DPROD = DX * DY;
900 DDPROD = (TYPE2) DPROD;
901 sum_1 = DDPROD + sum_0;
902 In which
903 - DX is double the size of X
904 - DY is double the size of Y
905 - DX, DY, DPROD all have the same type
906 - sum is the same size of DPROD or bigger
907 - sum has been recognized as a reduction variable.
908
909 This is equivalent to:
910 DPROD = X w* Y; #widen mult
911 sum_1 = DPROD w+ sum_0; #widen summation
912 or
913 DPROD = X w* Y; #widen mult
914 sum_1 = DPROD + sum_0; #summation
915 */
916
917 /* Starting from LAST_STMT, follow the defs of its uses in search
918 of the above pattern. */
919
920 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
921 &oprnd0, &oprnd1))
922 return NULL;
923
924 type = gimple_expr_type (last_stmt);
925
926 vect_unpromoted_value unprom_mult;
927 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
928
929 /* So far so good. Since last_stmt was detected as a (summation) reduction,
930 we know that oprnd1 is the reduction variable (defined by a loop-header
931 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
932 Left to check that oprnd0 is defined by a (widen_)mult_expr */
933 if (!oprnd0)
934 return NULL;
935
936 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
937 if (!mult_vinfo)
938 return NULL;
939
940 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
941 inside the loop (in case we are analyzing an outer-loop). */
942 vect_unpromoted_value unprom0[2];
943 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
944 false, 2, unprom0, &half_type))
945 return NULL;
946
947 /* If there are two widening operations, make sure they agree on
948 the sign of the extension. */
949 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
950 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
951 return NULL;
952
953 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
954
955 tree half_vectype;
956 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
957 type_out, &half_vectype))
958 return NULL;
959
960 /* Get the inputs in the appropriate types. */
961 tree mult_oprnd[2];
962 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
963 unprom0, half_vectype);
964
965 var = vect_recog_temp_ssa_var (type, NULL);
966 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
967 mult_oprnd[0], mult_oprnd[1], oprnd1);
968
969 return pattern_stmt;
970 }
971
972
973 /* Function vect_recog_sad_pattern
974
975 Try to find the following Sum of Absolute Difference (SAD) pattern:
976
977 type x_t, y_t;
978 signed TYPE1 diff, abs_diff;
979 TYPE2 sum = init;
980 loop:
981 sum_0 = phi <init, sum_1>
982 S1 x_t = ...
983 S2 y_t = ...
984 S3 x_T = (TYPE1) x_t;
985 S4 y_T = (TYPE1) y_t;
986 S5 diff = x_T - y_T;
987 S6 abs_diff = ABS_EXPR <diff>;
988 [S7 abs_diff = (TYPE2) abs_diff; #optional]
989 S8 sum_1 = abs_diff + sum_0;
990
991 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
992 same size of 'TYPE1' or bigger. This is a special case of a reduction
993 computation.
994
995 Input:
996
997 * STMT_VINFO: The stmt from which the pattern search begins. In the
998 example, when this function is called with S8, the pattern
999 {S3,S4,S5,S6,S7,S8} will be detected.
1000
1001 Output:
1002
1003 * TYPE_OUT: The type of the output of this pattern.
1004
1005 * Return value: A new stmt that will be used to replace the sequence of
1006 stmts that constitute the pattern. In this case it will be:
1007 SAD_EXPR <x_t, y_t, sum_0>
1008 */
1009
1010 static gimple *
1011 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1012 {
1013 gimple *last_stmt = stmt_vinfo->stmt;
1014 vec_info *vinfo = stmt_vinfo->vinfo;
1015 tree half_type;
1016
1017 /* Look for the following pattern
1018 DX = (TYPE1) X;
1019 DY = (TYPE1) Y;
1020 DDIFF = DX - DY;
1021 DAD = ABS_EXPR <DDIFF>;
1022 DDPROD = (TYPE2) DPROD;
1023 sum_1 = DAD + sum_0;
1024 In which
1025 - DX is at least double the size of X
1026 - DY is at least double the size of Y
1027 - DX, DY, DDIFF, DAD all have the same type
1028 - sum is the same size of DAD or bigger
1029 - sum has been recognized as a reduction variable.
1030
1031 This is equivalent to:
1032 DDIFF = X w- Y; #widen sub
1033 DAD = ABS_EXPR <DDIFF>;
1034 sum_1 = DAD w+ sum_0; #widen summation
1035 or
1036 DDIFF = X w- Y; #widen sub
1037 DAD = ABS_EXPR <DDIFF>;
1038 sum_1 = DAD + sum_0; #summation
1039 */
1040
1041 /* Starting from LAST_STMT, follow the defs of its uses in search
1042 of the above pattern. */
1043
1044 tree plus_oprnd0, plus_oprnd1;
1045 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1046 &plus_oprnd0, &plus_oprnd1))
1047 return NULL;
1048
1049 tree sum_type = gimple_expr_type (last_stmt);
1050
1051 /* Any non-truncating sequence of conversions is OK here, since
1052 with a successful match, the result of the ABS(U) is known to fit
1053 within the nonnegative range of the result type. (It cannot be the
1054 negative of the minimum signed value due to the range of the widening
1055 MINUS_EXPR.) */
1056 vect_unpromoted_value unprom_abs;
1057 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1058 &unprom_abs);
1059
1060 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1061 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1062 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1063 Then check that plus_oprnd0 is defined by an abs_expr. */
1064
1065 if (!plus_oprnd0)
1066 return NULL;
1067
1068 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1069 if (!abs_stmt_vinfo)
1070 return NULL;
1071
1072 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1073 inside the loop (in case we are analyzing an outer-loop). */
1074 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1075 if (!abs_stmt
1076 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1077 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1078 return NULL;
1079
1080 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1081 tree abs_type = TREE_TYPE (abs_oprnd);
1082 if (TYPE_UNSIGNED (abs_type))
1083 return NULL;
1084
1085 /* Peel off conversions from the ABS input. This can involve sign
1086 changes (e.g. from an unsigned subtraction to a signed ABS input)
1087 or signed promotion, but it can't include unsigned promotion.
1088 (Note that ABS of an unsigned promotion should have been folded
1089 away before now anyway.) */
1090 vect_unpromoted_value unprom_diff;
1091 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1092 &unprom_diff);
1093 if (!abs_oprnd)
1094 return NULL;
1095 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1096 && TYPE_UNSIGNED (unprom_diff.type))
1097 return NULL;
1098
1099 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1100 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1101 if (!diff_stmt_vinfo)
1102 return NULL;
1103
1104 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1105 inside the loop (in case we are analyzing an outer-loop). */
1106 vect_unpromoted_value unprom[2];
1107 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1108 false, 2, unprom, &half_type))
1109 return NULL;
1110
1111 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1112
1113 tree half_vectype;
1114 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1115 type_out, &half_vectype))
1116 return NULL;
1117
1118 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1119 tree sad_oprnd[2];
1120 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1121 unprom, half_vectype);
1122
1123 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1124 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1125 sad_oprnd[1], plus_oprnd1);
1126
1127 return pattern_stmt;
1128 }
1129
1130 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1131 so that it can be treated as though it had the form:
1132
1133 A_TYPE a;
1134 B_TYPE b;
1135 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1136 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1137 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1138 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1139 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1140
1141 Try to replace the pattern with:
1142
1143 A_TYPE a;
1144 B_TYPE b;
1145 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1146 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1147 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1148 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1149
1150 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1151
1152 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1153 name of the pattern being matched, for dump purposes. */
1154
1155 static gimple *
1156 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1157 tree_code orig_code, tree_code wide_code,
1158 bool shift_p, const char *name)
1159 {
1160 gimple *last_stmt = last_stmt_info->stmt;
1161
1162 vect_unpromoted_value unprom[2];
1163 tree half_type;
1164 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1165 shift_p, 2, unprom, &half_type))
1166 return NULL;
1167
1168 /* Pattern detected. */
1169 vect_pattern_detected (name, last_stmt);
1170
1171 tree type = gimple_expr_type (last_stmt);
1172 tree itype = type;
1173 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1174 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1175 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1176 TYPE_UNSIGNED (half_type));
1177
1178 /* Check target support */
1179 tree vectype = get_vectype_for_scalar_type (half_type);
1180 tree vecitype = get_vectype_for_scalar_type (itype);
1181 enum tree_code dummy_code;
1182 int dummy_int;
1183 auto_vec<tree> dummy_vec;
1184 if (!vectype
1185 || !vecitype
1186 || !supportable_widening_operation (wide_code, last_stmt_info,
1187 vecitype, vectype,
1188 &dummy_code, &dummy_code,
1189 &dummy_int, &dummy_vec))
1190 return NULL;
1191
1192 *type_out = get_vectype_for_scalar_type (type);
1193 if (!*type_out)
1194 return NULL;
1195
1196 tree oprnd[2];
1197 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1198
1199 tree var = vect_recog_temp_ssa_var (itype, NULL);
1200 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1201 oprnd[0], oprnd[1]);
1202
1203 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1204 }
1205
1206 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1207 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1208
1209 static gimple *
1210 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1211 {
1212 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1213 WIDEN_MULT_EXPR, false,
1214 "vect_recog_widen_mult_pattern");
1215 }
1216
1217 /* Function vect_recog_pow_pattern
1218
1219 Try to find the following pattern:
1220
1221 x = POW (y, N);
1222
1223 with POW being one of pow, powf, powi, powif and N being
1224 either 2 or 0.5.
1225
1226 Input:
1227
1228 * STMT_VINFO: The stmt from which the pattern search begins.
1229
1230 Output:
1231
1232 * TYPE_OUT: The type of the output of this pattern.
1233
1234 * Return value: A new stmt that will be used to replace the sequence of
1235 stmts that constitute the pattern. In this case it will be:
1236 x = x * x
1237 or
1238 x = sqrt (x)
1239 */
1240
1241 static gimple *
1242 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1243 {
1244 gimple *last_stmt = stmt_vinfo->stmt;
1245 tree base, exp;
1246 gimple *stmt;
1247 tree var;
1248
1249 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1250 return NULL;
1251
1252 switch (gimple_call_combined_fn (last_stmt))
1253 {
1254 CASE_CFN_POW:
1255 CASE_CFN_POWI:
1256 break;
1257
1258 default:
1259 return NULL;
1260 }
1261
1262 base = gimple_call_arg (last_stmt, 0);
1263 exp = gimple_call_arg (last_stmt, 1);
1264 if (TREE_CODE (exp) != REAL_CST
1265 && TREE_CODE (exp) != INTEGER_CST)
1266 {
1267 if (flag_unsafe_math_optimizations
1268 && TREE_CODE (base) == REAL_CST
1269 && !gimple_call_internal_p (last_stmt))
1270 {
1271 combined_fn log_cfn;
1272 built_in_function exp_bfn;
1273 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1274 {
1275 case BUILT_IN_POW:
1276 log_cfn = CFN_BUILT_IN_LOG;
1277 exp_bfn = BUILT_IN_EXP;
1278 break;
1279 case BUILT_IN_POWF:
1280 log_cfn = CFN_BUILT_IN_LOGF;
1281 exp_bfn = BUILT_IN_EXPF;
1282 break;
1283 case BUILT_IN_POWL:
1284 log_cfn = CFN_BUILT_IN_LOGL;
1285 exp_bfn = BUILT_IN_EXPL;
1286 break;
1287 default:
1288 return NULL;
1289 }
1290 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1291 tree exp_decl = builtin_decl_implicit (exp_bfn);
1292 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1293 does that, but if C is a power of 2, we want to use
1294 exp2 (log2 (C) * x) in the non-vectorized version, but for
1295 vectorization we don't have vectorized exp2. */
1296 if (logc
1297 && TREE_CODE (logc) == REAL_CST
1298 && exp_decl
1299 && lookup_attribute ("omp declare simd",
1300 DECL_ATTRIBUTES (exp_decl)))
1301 {
1302 cgraph_node *node = cgraph_node::get_create (exp_decl);
1303 if (node->simd_clones == NULL)
1304 {
1305 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1306 || node->definition)
1307 return NULL;
1308 expand_simd_clones (node);
1309 if (node->simd_clones == NULL)
1310 return NULL;
1311 }
1312 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1313 if (!*type_out)
1314 return NULL;
1315 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1316 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1317 append_pattern_def_seq (stmt_vinfo, g);
1318 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1319 g = gimple_build_call (exp_decl, 1, def);
1320 gimple_call_set_lhs (g, res);
1321 return g;
1322 }
1323 }
1324
1325 return NULL;
1326 }
1327
1328 /* We now have a pow or powi builtin function call with a constant
1329 exponent. */
1330
1331 /* Catch squaring. */
1332 if ((tree_fits_shwi_p (exp)
1333 && tree_to_shwi (exp) == 2)
1334 || (TREE_CODE (exp) == REAL_CST
1335 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1336 {
1337 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1338 TREE_TYPE (base), type_out))
1339 return NULL;
1340
1341 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1342 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1343 return stmt;
1344 }
1345
1346 /* Catch square root. */
1347 if (TREE_CODE (exp) == REAL_CST
1348 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1349 {
1350 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1351 if (*type_out
1352 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1353 OPTIMIZE_FOR_SPEED))
1354 {
1355 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1356 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1357 gimple_call_set_lhs (stmt, var);
1358 gimple_call_set_nothrow (stmt, true);
1359 return stmt;
1360 }
1361 }
1362
1363 return NULL;
1364 }
1365
1366
1367 /* Function vect_recog_widen_sum_pattern
1368
1369 Try to find the following pattern:
1370
1371 type x_t;
1372 TYPE x_T, sum = init;
1373 loop:
1374 sum_0 = phi <init, sum_1>
1375 S1 x_t = *p;
1376 S2 x_T = (TYPE) x_t;
1377 S3 sum_1 = x_T + sum_0;
1378
1379 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1380 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1381 a special case of a reduction computation.
1382
1383 Input:
1384
1385 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1386 when this function is called with S3, the pattern {S2,S3} will be detected.
1387
1388 Output:
1389
1390 * TYPE_OUT: The type of the output of this pattern.
1391
1392 * Return value: A new stmt that will be used to replace the sequence of
1393 stmts that constitute the pattern. In this case it will be:
1394 WIDEN_SUM <x_t, sum_0>
1395
1396 Note: The widening-sum idiom is a widening reduction pattern that is
1397 vectorized without preserving all the intermediate results. It
1398 produces only N/2 (widened) results (by summing up pairs of
1399 intermediate results) rather than all N results. Therefore, we
1400 cannot allow this pattern when we want to get all the results and in
1401 the correct order (as is the case when this computation is in an
1402 inner-loop nested in an outer-loop that us being vectorized). */
1403
1404 static gimple *
1405 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1406 {
1407 gimple *last_stmt = stmt_vinfo->stmt;
1408 tree oprnd0, oprnd1;
1409 vec_info *vinfo = stmt_vinfo->vinfo;
1410 tree type;
1411 gimple *pattern_stmt;
1412 tree var;
1413
1414 /* Look for the following pattern
1415 DX = (TYPE) X;
1416 sum_1 = DX + sum_0;
1417 In which DX is at least double the size of X, and sum_1 has been
1418 recognized as a reduction variable.
1419 */
1420
1421 /* Starting from LAST_STMT, follow the defs of its uses in search
1422 of the above pattern. */
1423
1424 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1425 &oprnd0, &oprnd1))
1426 return NULL;
1427
1428 type = gimple_expr_type (last_stmt);
1429
1430 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1431 we know that oprnd1 is the reduction variable (defined by a loop-header
1432 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1433 Left to check that oprnd0 is defined by a cast from type 'type' to type
1434 'TYPE'. */
1435
1436 vect_unpromoted_value unprom0;
1437 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1438 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1439 return NULL;
1440
1441 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1442
1443 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1444 type_out))
1445 return NULL;
1446
1447 var = vect_recog_temp_ssa_var (type, NULL);
1448 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1449
1450 return pattern_stmt;
1451 }
1452
1453 /* Recognize cases in which an operation is performed in one type WTYPE
1454 but could be done more efficiently in a narrower type NTYPE. For example,
1455 if we have:
1456
1457 ATYPE a; // narrower than NTYPE
1458 BTYPE b; // narrower than NTYPE
1459 WTYPE aw = (WTYPE) a;
1460 WTYPE bw = (WTYPE) b;
1461 WTYPE res = aw + bw; // only uses of aw and bw
1462
1463 then it would be more efficient to do:
1464
1465 NTYPE an = (NTYPE) a;
1466 NTYPE bn = (NTYPE) b;
1467 NTYPE resn = an + bn;
1468 WTYPE res = (WTYPE) resn;
1469
1470 Other situations include things like:
1471
1472 ATYPE a; // NTYPE or narrower
1473 WTYPE aw = (WTYPE) a;
1474 WTYPE res = aw + b;
1475
1476 when only "(NTYPE) res" is significant. In that case it's more efficient
1477 to truncate "b" and do the operation on NTYPE instead:
1478
1479 NTYPE an = (NTYPE) a;
1480 NTYPE bn = (NTYPE) b; // truncation
1481 NTYPE resn = an + bn;
1482 WTYPE res = (WTYPE) resn;
1483
1484 All users of "res" should then use "resn" instead, making the final
1485 statement dead (not marked as relevant). The final statement is still
1486 needed to maintain the type correctness of the IR.
1487
1488 vect_determine_precisions has already determined the minimum
1489 precison of the operation and the minimum precision required
1490 by users of the result. */
1491
1492 static gimple *
1493 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1494 {
1495 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1496 if (!last_stmt)
1497 return NULL;
1498
1499 /* See whether we have found that this operation can be done on a
1500 narrower type without changing its semantics. */
1501 unsigned int new_precision = last_stmt_info->operation_precision;
1502 if (!new_precision)
1503 return NULL;
1504
1505 vec_info *vinfo = last_stmt_info->vinfo;
1506 tree lhs = gimple_assign_lhs (last_stmt);
1507 tree type = TREE_TYPE (lhs);
1508 tree_code code = gimple_assign_rhs_code (last_stmt);
1509
1510 /* Keep the first operand of a COND_EXPR as-is: only the other two
1511 operands are interesting. */
1512 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1513
1514 /* Check the operands. */
1515 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1516 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1517 unprom.quick_grow (nops);
1518 unsigned int min_precision = 0;
1519 bool single_use_p = false;
1520 for (unsigned int i = 0; i < nops; ++i)
1521 {
1522 tree op = gimple_op (last_stmt, first_op + i);
1523 if (TREE_CODE (op) == INTEGER_CST)
1524 unprom[i].set_op (op, vect_constant_def);
1525 else if (TREE_CODE (op) == SSA_NAME)
1526 {
1527 bool op_single_use_p = true;
1528 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1529 &op_single_use_p))
1530 return NULL;
1531 /* If:
1532
1533 (1) N bits of the result are needed;
1534 (2) all inputs are widened from M<N bits; and
1535 (3) one operand OP is a single-use SSA name
1536
1537 we can shift the M->N widening from OP to the output
1538 without changing the number or type of extensions involved.
1539 This then reduces the number of copies of STMT_INFO.
1540
1541 If instead of (3) more than one operand is a single-use SSA name,
1542 shifting the extension to the output is even more of a win.
1543
1544 If instead:
1545
1546 (1) N bits of the result are needed;
1547 (2) one operand OP2 is widened from M2<N bits;
1548 (3) another operand OP1 is widened from M1<M2 bits; and
1549 (4) both OP1 and OP2 are single-use
1550
1551 the choice is between:
1552
1553 (a) truncating OP2 to M1, doing the operation on M1,
1554 and then widening the result to N
1555
1556 (b) widening OP1 to M2, doing the operation on M2, and then
1557 widening the result to N
1558
1559 Both shift the M2->N widening of the inputs to the output.
1560 (a) additionally shifts the M1->M2 widening to the output;
1561 it requires fewer copies of STMT_INFO but requires an extra
1562 M2->M1 truncation.
1563
1564 Which is better will depend on the complexity and cost of
1565 STMT_INFO, which is hard to predict at this stage. However,
1566 a clear tie-breaker in favor of (b) is the fact that the
1567 truncation in (a) increases the length of the operation chain.
1568
1569 If instead of (4) only one of OP1 or OP2 is single-use,
1570 (b) is still a win over doing the operation in N bits:
1571 it still shifts the M2->N widening on the single-use operand
1572 to the output and reduces the number of STMT_INFO copies.
1573
1574 If neither operand is single-use then operating on fewer than
1575 N bits might lead to more extensions overall. Whether it does
1576 or not depends on global information about the vectorization
1577 region, and whether that's a good trade-off would again
1578 depend on the complexity and cost of the statements involved,
1579 as well as things like register pressure that are not normally
1580 modelled at this stage. We therefore ignore these cases
1581 and just optimize the clear single-use wins above.
1582
1583 Thus we take the maximum precision of the unpromoted operands
1584 and record whether any operand is single-use. */
1585 if (unprom[i].dt == vect_internal_def)
1586 {
1587 min_precision = MAX (min_precision,
1588 TYPE_PRECISION (unprom[i].type));
1589 single_use_p |= op_single_use_p;
1590 }
1591 }
1592 }
1593
1594 /* Although the operation could be done in operation_precision, we have
1595 to balance that against introducing extra truncations or extensions.
1596 Calculate the minimum precision that can be handled efficiently.
1597
1598 The loop above determined that the operation could be handled
1599 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1600 extension from the inputs to the output without introducing more
1601 instructions, and would reduce the number of instructions required
1602 for STMT_INFO itself.
1603
1604 vect_determine_precisions has also determined that the result only
1605 needs min_output_precision bits. Truncating by a factor of N times
1606 requires a tree of N - 1 instructions, so if TYPE is N times wider
1607 than min_output_precision, doing the operation in TYPE and truncating
1608 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1609 In contrast:
1610
1611 - truncating the input to a unary operation and doing the operation
1612 in the new type requires at most N - 1 + 1 = N instructions per
1613 output vector
1614
1615 - doing the same for a binary operation requires at most
1616 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1617
1618 Both unary and binary operations require fewer instructions than
1619 this if the operands were extended from a suitable truncated form.
1620 Thus there is usually nothing to lose by doing operations in
1621 min_output_precision bits, but there can be something to gain. */
1622 if (!single_use_p)
1623 min_precision = last_stmt_info->min_output_precision;
1624 else
1625 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1626
1627 /* Apply the minimum efficient precision we just calculated. */
1628 if (new_precision < min_precision)
1629 new_precision = min_precision;
1630 if (new_precision >= TYPE_PRECISION (type))
1631 return NULL;
1632
1633 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1634
1635 *type_out = get_vectype_for_scalar_type (type);
1636 if (!*type_out)
1637 return NULL;
1638
1639 /* We've found a viable pattern. Get the new type of the operation. */
1640 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1641 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1642
1643 /* We specifically don't check here whether the target supports the
1644 new operation, since it might be something that a later pattern
1645 wants to rewrite anyway. If targets have a minimum element size
1646 for some optabs, we should pattern-match smaller ops to larger ops
1647 where beneficial. */
1648 tree new_vectype = get_vectype_for_scalar_type (new_type);
1649 if (!new_vectype)
1650 return NULL;
1651
1652 if (dump_enabled_p ())
1653 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1654 type, new_type);
1655
1656 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1657 tree ops[3] = {};
1658 for (unsigned int i = 1; i < first_op; ++i)
1659 ops[i - 1] = gimple_op (last_stmt, i);
1660 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1661 new_type, &unprom[0], new_vectype);
1662
1663 /* Use the operation to produce a result of type NEW_TYPE. */
1664 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1665 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1666 ops[0], ops[1], ops[2]);
1667 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1668
1669 if (dump_enabled_p ())
1670 dump_printf_loc (MSG_NOTE, vect_location,
1671 "created pattern stmt: %G", pattern_stmt);
1672
1673 pattern_stmt = vect_convert_output (last_stmt_info, type,
1674 pattern_stmt, new_vectype);
1675
1676 return pattern_stmt;
1677 }
1678
1679 /* Recognize the patterns:
1680
1681 ATYPE a; // narrower than TYPE
1682 BTYPE b; // narrower than TYPE
1683 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1684 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1685
1686 where only the bottom half of avg is used. Try to transform them into:
1687
1688 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1689 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1690
1691 followed by:
1692
1693 TYPE avg = (TYPE) avg';
1694
1695 where NTYPE is no wider than half of TYPE. Since only the bottom half
1696 of avg is used, all or part of the cast of avg' should become redundant. */
1697
1698 static gimple *
1699 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1700 {
1701 /* Check for a shift right by one bit. */
1702 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1703 vec_info *vinfo = last_stmt_info->vinfo;
1704 if (!last_stmt
1705 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1706 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1707 return NULL;
1708
1709 /* Check that the shift result is wider than the users of the
1710 result need (i.e. that narrowing would be a natural choice). */
1711 tree lhs = gimple_assign_lhs (last_stmt);
1712 tree type = TREE_TYPE (lhs);
1713 unsigned int target_precision
1714 = vect_element_precision (last_stmt_info->min_output_precision);
1715 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1716 return NULL;
1717
1718 /* Get the definition of the shift input. */
1719 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1720 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1721 if (!plus_stmt_info)
1722 return NULL;
1723
1724 /* Check whether the shift input can be seen as a tree of additions on
1725 2 or 3 widened inputs.
1726
1727 Note that the pattern should be a win even if the result of one or
1728 more additions is reused elsewhere: if the pattern matches, we'd be
1729 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1730 internal_fn ifn = IFN_AVG_FLOOR;
1731 vect_unpromoted_value unprom[3];
1732 tree new_type;
1733 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1734 PLUS_EXPR, false, 3,
1735 unprom, &new_type);
1736 if (nops == 0)
1737 return NULL;
1738 if (nops == 3)
1739 {
1740 /* Check that one operand is 1. */
1741 unsigned int i;
1742 for (i = 0; i < 3; ++i)
1743 if (integer_onep (unprom[i].op))
1744 break;
1745 if (i == 3)
1746 return NULL;
1747 /* Throw away the 1 operand and keep the other two. */
1748 if (i < 2)
1749 unprom[i] = unprom[2];
1750 ifn = IFN_AVG_CEIL;
1751 }
1752
1753 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1754
1755 /* We know that:
1756
1757 (a) the operation can be viewed as:
1758
1759 TYPE widened0 = (TYPE) UNPROM[0];
1760 TYPE widened1 = (TYPE) UNPROM[1];
1761 TYPE tmp1 = widened0 + widened1 {+ 1};
1762 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1763
1764 (b) the first two statements are equivalent to:
1765
1766 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1767 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1768
1769 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1770 where sensible;
1771
1772 (d) all the operations can be performed correctly at twice the width of
1773 NEW_TYPE, due to the nature of the average operation; and
1774
1775 (e) users of the result of the right shift need only TARGET_PRECISION
1776 bits, where TARGET_PRECISION is no more than half of TYPE's
1777 precision.
1778
1779 Under these circumstances, the only situation in which NEW_TYPE
1780 could be narrower than TARGET_PRECISION is if widened0, widened1
1781 and an addition result are all used more than once. Thus we can
1782 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1783 as "free", whereas widening the result of the average instruction
1784 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1785 therefore better not to go narrower than TARGET_PRECISION. */
1786 if (TYPE_PRECISION (new_type) < target_precision)
1787 new_type = build_nonstandard_integer_type (target_precision,
1788 TYPE_UNSIGNED (new_type));
1789
1790 /* Check for target support. */
1791 tree new_vectype = get_vectype_for_scalar_type (new_type);
1792 if (!new_vectype
1793 || !direct_internal_fn_supported_p (ifn, new_vectype,
1794 OPTIMIZE_FOR_SPEED))
1795 return NULL;
1796
1797 /* The IR requires a valid vector type for the cast result, even though
1798 it's likely to be discarded. */
1799 *type_out = get_vectype_for_scalar_type (type);
1800 if (!*type_out)
1801 return NULL;
1802
1803 /* Generate the IFN_AVG* call. */
1804 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1805 tree new_ops[2];
1806 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1807 unprom, new_vectype);
1808 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1809 new_ops[1]);
1810 gimple_call_set_lhs (average_stmt, new_var);
1811 gimple_set_location (average_stmt, gimple_location (last_stmt));
1812
1813 if (dump_enabled_p ())
1814 dump_printf_loc (MSG_NOTE, vect_location,
1815 "created pattern stmt: %G", average_stmt);
1816
1817 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1818 }
1819
1820 /* Recognize cases in which the input to a cast is wider than its
1821 output, and the input is fed by a widening operation. Fold this
1822 by removing the unnecessary intermediate widening. E.g.:
1823
1824 unsigned char a;
1825 unsigned int b = (unsigned int) a;
1826 unsigned short c = (unsigned short) b;
1827
1828 -->
1829
1830 unsigned short c = (unsigned short) a;
1831
1832 Although this is rare in input IR, it is an expected side-effect
1833 of the over-widening pattern above.
1834
1835 This is beneficial also for integer-to-float conversions, if the
1836 widened integer has more bits than the float, and if the unwidened
1837 input doesn't. */
1838
1839 static gimple *
1840 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1841 {
1842 /* Check for a cast, including an integer-to-float conversion. */
1843 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1844 if (!last_stmt)
1845 return NULL;
1846 tree_code code = gimple_assign_rhs_code (last_stmt);
1847 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1848 return NULL;
1849
1850 /* Make sure that the rhs is a scalar with a natural bitsize. */
1851 tree lhs = gimple_assign_lhs (last_stmt);
1852 if (!lhs)
1853 return NULL;
1854 tree lhs_type = TREE_TYPE (lhs);
1855 scalar_mode lhs_mode;
1856 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1857 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1858 return NULL;
1859
1860 /* Check for a narrowing operation (from a vector point of view). */
1861 tree rhs = gimple_assign_rhs1 (last_stmt);
1862 tree rhs_type = TREE_TYPE (rhs);
1863 if (!INTEGRAL_TYPE_P (rhs_type)
1864 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1865 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1866 return NULL;
1867
1868 /* Try to find an unpromoted input. */
1869 vec_info *vinfo = last_stmt_info->vinfo;
1870 vect_unpromoted_value unprom;
1871 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1872 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1873 return NULL;
1874
1875 /* If the bits above RHS_TYPE matter, make sure that they're the
1876 same when extending from UNPROM as they are when extending from RHS. */
1877 if (!INTEGRAL_TYPE_P (lhs_type)
1878 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1879 return NULL;
1880
1881 /* We can get the same result by casting UNPROM directly, to avoid
1882 the unnecessary widening and narrowing. */
1883 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1884
1885 *type_out = get_vectype_for_scalar_type (lhs_type);
1886 if (!*type_out)
1887 return NULL;
1888
1889 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1890 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1891 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1892
1893 return pattern_stmt;
1894 }
1895
1896 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1897 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1898
1899 static gimple *
1900 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1901 {
1902 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
1903 WIDEN_LSHIFT_EXPR, true,
1904 "vect_recog_widen_shift_pattern");
1905 }
1906
1907 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1908
1909 type a_t, b_t, c_t;
1910
1911 S0 a_t = b_t r<< c_t;
1912
1913 Input/Output:
1914
1915 * STMT_VINFO: The stmt from which the pattern search begins,
1916 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1917 with a sequence:
1918
1919 S1 d_t = -c_t;
1920 S2 e_t = d_t & (B - 1);
1921 S3 f_t = b_t << c_t;
1922 S4 g_t = b_t >> e_t;
1923 S0 a_t = f_t | g_t;
1924
1925 where B is element bitsize of type.
1926
1927 Output:
1928
1929 * TYPE_OUT: The type of the output of this pattern.
1930
1931 * Return value: A new stmt that will be used to replace the rotate
1932 S0 stmt. */
1933
1934 static gimple *
1935 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1936 {
1937 gimple *last_stmt = stmt_vinfo->stmt;
1938 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1939 gimple *pattern_stmt, *def_stmt;
1940 enum tree_code rhs_code;
1941 vec_info *vinfo = stmt_vinfo->vinfo;
1942 enum vect_def_type dt;
1943 optab optab1, optab2;
1944 edge ext_def = NULL;
1945
1946 if (!is_gimple_assign (last_stmt))
1947 return NULL;
1948
1949 rhs_code = gimple_assign_rhs_code (last_stmt);
1950 switch (rhs_code)
1951 {
1952 case LROTATE_EXPR:
1953 case RROTATE_EXPR:
1954 break;
1955 default:
1956 return NULL;
1957 }
1958
1959 lhs = gimple_assign_lhs (last_stmt);
1960 oprnd0 = gimple_assign_rhs1 (last_stmt);
1961 type = TREE_TYPE (oprnd0);
1962 oprnd1 = gimple_assign_rhs2 (last_stmt);
1963 if (TREE_CODE (oprnd0) != SSA_NAME
1964 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1965 || !INTEGRAL_TYPE_P (type)
1966 || !TYPE_UNSIGNED (type))
1967 return NULL;
1968
1969 stmt_vec_info def_stmt_info;
1970 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
1971 return NULL;
1972
1973 if (dt != vect_internal_def
1974 && dt != vect_constant_def
1975 && dt != vect_external_def)
1976 return NULL;
1977
1978 vectype = get_vectype_for_scalar_type (type);
1979 if (vectype == NULL_TREE)
1980 return NULL;
1981
1982 /* If vector/vector or vector/scalar rotate is supported by the target,
1983 don't do anything here. */
1984 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1985 if (optab1
1986 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1987 return NULL;
1988
1989 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1990 {
1991 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1992 if (optab2
1993 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1994 return NULL;
1995 }
1996
1997 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1998 don't do anything here either. */
1999 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2000 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2001 if (!optab1
2002 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2003 || !optab2
2004 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2005 {
2006 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2007 return NULL;
2008 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2009 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2010 if (!optab1
2011 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2012 || !optab2
2013 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2014 return NULL;
2015 }
2016
2017 *type_out = vectype;
2018
2019 if (dt == vect_external_def
2020 && TREE_CODE (oprnd1) == SSA_NAME)
2021 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2022
2023 def = NULL_TREE;
2024 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2025 if (TREE_CODE (oprnd1) == INTEGER_CST
2026 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2027 def = oprnd1;
2028 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2029 {
2030 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2031 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2032 && TYPE_PRECISION (TREE_TYPE (rhs1))
2033 == TYPE_PRECISION (type))
2034 def = rhs1;
2035 }
2036
2037 if (def == NULL_TREE)
2038 {
2039 def = vect_recog_temp_ssa_var (type, NULL);
2040 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2041 if (ext_def)
2042 {
2043 basic_block new_bb
2044 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2045 gcc_assert (!new_bb);
2046 }
2047 else
2048 append_pattern_def_seq (stmt_vinfo, def_stmt);
2049 }
2050 stype = TREE_TYPE (def);
2051 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2052
2053 if (TREE_CODE (def) == INTEGER_CST)
2054 {
2055 if (!tree_fits_uhwi_p (def)
2056 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2057 || integer_zerop (def))
2058 return NULL;
2059 def2 = build_int_cst (stype,
2060 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2061 }
2062 else
2063 {
2064 tree vecstype = get_vectype_for_scalar_type (stype);
2065
2066 if (vecstype == NULL_TREE)
2067 return NULL;
2068 def2 = vect_recog_temp_ssa_var (stype, NULL);
2069 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2070 if (ext_def)
2071 {
2072 basic_block new_bb
2073 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2074 gcc_assert (!new_bb);
2075 }
2076 else
2077 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2078
2079 def2 = vect_recog_temp_ssa_var (stype, NULL);
2080 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2081 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2082 gimple_assign_lhs (def_stmt), mask);
2083 if (ext_def)
2084 {
2085 basic_block new_bb
2086 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2087 gcc_assert (!new_bb);
2088 }
2089 else
2090 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2091 }
2092
2093 var1 = vect_recog_temp_ssa_var (type, NULL);
2094 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2095 ? LSHIFT_EXPR : RSHIFT_EXPR,
2096 oprnd0, def);
2097 append_pattern_def_seq (stmt_vinfo, def_stmt);
2098
2099 var2 = vect_recog_temp_ssa_var (type, NULL);
2100 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2101 ? RSHIFT_EXPR : LSHIFT_EXPR,
2102 oprnd0, def2);
2103 append_pattern_def_seq (stmt_vinfo, def_stmt);
2104
2105 /* Pattern detected. */
2106 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2107
2108 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2109 var = vect_recog_temp_ssa_var (type, NULL);
2110 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2111
2112 return pattern_stmt;
2113 }
2114
2115 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2116 vectorized:
2117
2118 type a_t;
2119 TYPE b_T, res_T;
2120
2121 S1 a_t = ;
2122 S2 b_T = ;
2123 S3 res_T = b_T op a_t;
2124
2125 where type 'TYPE' is a type with different size than 'type',
2126 and op is <<, >> or rotate.
2127
2128 Also detect cases:
2129
2130 type a_t;
2131 TYPE b_T, c_T, res_T;
2132
2133 S0 c_T = ;
2134 S1 a_t = (type) c_T;
2135 S2 b_T = ;
2136 S3 res_T = b_T op a_t;
2137
2138 Input/Output:
2139
2140 * STMT_VINFO: The stmt from which the pattern search begins,
2141 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2142 with a shift/rotate which has same type on both operands, in the
2143 second case just b_T op c_T, in the first case with added cast
2144 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2145
2146 Output:
2147
2148 * TYPE_OUT: The type of the output of this pattern.
2149
2150 * Return value: A new stmt that will be used to replace the shift/rotate
2151 S3 stmt. */
2152
2153 static gimple *
2154 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2155 tree *type_out)
2156 {
2157 gimple *last_stmt = stmt_vinfo->stmt;
2158 tree oprnd0, oprnd1, lhs, var;
2159 gimple *pattern_stmt;
2160 enum tree_code rhs_code;
2161 vec_info *vinfo = stmt_vinfo->vinfo;
2162
2163 if (!is_gimple_assign (last_stmt))
2164 return NULL;
2165
2166 rhs_code = gimple_assign_rhs_code (last_stmt);
2167 switch (rhs_code)
2168 {
2169 case LSHIFT_EXPR:
2170 case RSHIFT_EXPR:
2171 case LROTATE_EXPR:
2172 case RROTATE_EXPR:
2173 break;
2174 default:
2175 return NULL;
2176 }
2177
2178 lhs = gimple_assign_lhs (last_stmt);
2179 oprnd0 = gimple_assign_rhs1 (last_stmt);
2180 oprnd1 = gimple_assign_rhs2 (last_stmt);
2181 if (TREE_CODE (oprnd0) != SSA_NAME
2182 || TREE_CODE (oprnd1) != SSA_NAME
2183 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2184 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2185 || TYPE_PRECISION (TREE_TYPE (lhs))
2186 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2187 return NULL;
2188
2189 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2190 if (!def_vinfo)
2191 return NULL;
2192
2193 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2194 if (*type_out == NULL_TREE)
2195 return NULL;
2196
2197 tree def = NULL_TREE;
2198 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2199 if (def_stmt && gimple_assign_cast_p (def_stmt))
2200 {
2201 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2202 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2203 && TYPE_PRECISION (TREE_TYPE (rhs1))
2204 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2205 {
2206 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2207 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2208 def = rhs1;
2209 else
2210 {
2211 tree mask
2212 = build_low_bits_mask (TREE_TYPE (rhs1),
2213 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2214 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2215 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2216 tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2217 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2218 }
2219 }
2220 }
2221
2222 if (def == NULL_TREE)
2223 {
2224 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2225 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2226 append_pattern_def_seq (stmt_vinfo, def_stmt);
2227 }
2228
2229 /* Pattern detected. */
2230 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2231
2232 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2233 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2234 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2235
2236 return pattern_stmt;
2237 }
2238
2239 /* Return true iff the target has a vector optab implementing the operation
2240 CODE on type VECTYPE. */
2241
2242 static bool
2243 target_has_vecop_for_code (tree_code code, tree vectype)
2244 {
2245 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2246 return voptab
2247 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2248 }
2249
2250 /* Verify that the target has optabs of VECTYPE to perform all the steps
2251 needed by the multiplication-by-immediate synthesis algorithm described by
2252 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2253 present. Return true iff the target supports all the steps. */
2254
2255 static bool
2256 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2257 tree vectype, bool synth_shift_p)
2258 {
2259 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2260 return false;
2261
2262 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2263 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2264
2265 if (var == negate_variant
2266 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2267 return false;
2268
2269 /* If we must synthesize shifts with additions make sure that vector
2270 addition is available. */
2271 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2272 return false;
2273
2274 for (int i = 1; i < alg->ops; i++)
2275 {
2276 switch (alg->op[i])
2277 {
2278 case alg_shift:
2279 break;
2280 case alg_add_t_m2:
2281 case alg_add_t2_m:
2282 case alg_add_factor:
2283 if (!supports_vplus)
2284 return false;
2285 break;
2286 case alg_sub_t_m2:
2287 case alg_sub_t2_m:
2288 case alg_sub_factor:
2289 if (!supports_vminus)
2290 return false;
2291 break;
2292 case alg_unknown:
2293 case alg_m:
2294 case alg_zero:
2295 case alg_impossible:
2296 return false;
2297 default:
2298 gcc_unreachable ();
2299 }
2300 }
2301
2302 return true;
2303 }
2304
2305 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2306 putting the final result in DEST. Append all statements but the last into
2307 VINFO. Return the last statement. */
2308
2309 static gimple *
2310 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2311 stmt_vec_info vinfo)
2312 {
2313 HOST_WIDE_INT i;
2314 tree itype = TREE_TYPE (op);
2315 tree prev_res = op;
2316 gcc_assert (amnt >= 0);
2317 for (i = 0; i < amnt; i++)
2318 {
2319 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2320 : dest;
2321 gimple *stmt
2322 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2323 prev_res = tmp_var;
2324 if (i < amnt - 1)
2325 append_pattern_def_seq (vinfo, stmt);
2326 else
2327 return stmt;
2328 }
2329 gcc_unreachable ();
2330 return NULL;
2331 }
2332
2333 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2334 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2335 the process if necessary. Append the resulting assignment statements
2336 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2337 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2338 left shifts using additions. */
2339
2340 static tree
2341 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2342 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2343 {
2344 if (integer_zerop (op2)
2345 && (code == LSHIFT_EXPR
2346 || code == PLUS_EXPR))
2347 {
2348 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2349 return op1;
2350 }
2351
2352 gimple *stmt;
2353 tree itype = TREE_TYPE (op1);
2354 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2355
2356 if (code == LSHIFT_EXPR
2357 && synth_shift_p)
2358 {
2359 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2360 stmt_vinfo);
2361 append_pattern_def_seq (stmt_vinfo, stmt);
2362 return tmp_var;
2363 }
2364
2365 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2366 append_pattern_def_seq (stmt_vinfo, stmt);
2367 return tmp_var;
2368 }
2369
2370 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2371 and simple arithmetic operations to be vectorized. Record the statements
2372 produced in STMT_VINFO and return the last statement in the sequence or
2373 NULL if it's not possible to synthesize such a multiplication.
2374 This function mirrors the behavior of expand_mult_const in expmed.c but
2375 works on tree-ssa form. */
2376
2377 static gimple *
2378 vect_synth_mult_by_constant (tree op, tree val,
2379 stmt_vec_info stmt_vinfo)
2380 {
2381 tree itype = TREE_TYPE (op);
2382 machine_mode mode = TYPE_MODE (itype);
2383 struct algorithm alg;
2384 mult_variant variant;
2385 if (!tree_fits_shwi_p (val))
2386 return NULL;
2387
2388 /* Multiplication synthesis by shifts, adds and subs can introduce
2389 signed overflow where the original operation didn't. Perform the
2390 operations on an unsigned type and cast back to avoid this.
2391 In the future we may want to relax this for synthesis algorithms
2392 that we can prove do not cause unexpected overflow. */
2393 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2394
2395 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2396
2397 /* Targets that don't support vector shifts but support vector additions
2398 can synthesize shifts that way. */
2399 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2400
2401 HOST_WIDE_INT hwval = tree_to_shwi (val);
2402 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2403 The vectorizer's benefit analysis will decide whether it's beneficial
2404 to do this. */
2405 bool possible = choose_mult_variant (mode, hwval, &alg,
2406 &variant, MAX_COST);
2407 if (!possible)
2408 return NULL;
2409
2410 tree vectype = get_vectype_for_scalar_type (multtype);
2411
2412 if (!vectype
2413 || !target_supports_mult_synth_alg (&alg, variant,
2414 vectype, synth_shift_p))
2415 return NULL;
2416
2417 tree accumulator;
2418
2419 /* Clear out the sequence of statements so we can populate it below. */
2420 gimple *stmt = NULL;
2421
2422 if (cast_to_unsigned_p)
2423 {
2424 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2425 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2426 append_pattern_def_seq (stmt_vinfo, stmt);
2427 op = tmp_op;
2428 }
2429
2430 if (alg.op[0] == alg_zero)
2431 accumulator = build_int_cst (multtype, 0);
2432 else
2433 accumulator = op;
2434
2435 bool needs_fixup = (variant == negate_variant)
2436 || (variant == add_variant);
2437
2438 for (int i = 1; i < alg.ops; i++)
2439 {
2440 tree shft_log = build_int_cst (multtype, alg.log[i]);
2441 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2442 tree tmp_var = NULL_TREE;
2443
2444 switch (alg.op[i])
2445 {
2446 case alg_shift:
2447 if (synth_shift_p)
2448 stmt
2449 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2450 stmt_vinfo);
2451 else
2452 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2453 shft_log);
2454 break;
2455 case alg_add_t_m2:
2456 tmp_var
2457 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2458 stmt_vinfo, synth_shift_p);
2459 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2460 tmp_var);
2461 break;
2462 case alg_sub_t_m2:
2463 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2464 shft_log, stmt_vinfo,
2465 synth_shift_p);
2466 /* In some algorithms the first step involves zeroing the
2467 accumulator. If subtracting from such an accumulator
2468 just emit the negation directly. */
2469 if (integer_zerop (accumulator))
2470 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2471 else
2472 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2473 tmp_var);
2474 break;
2475 case alg_add_t2_m:
2476 tmp_var
2477 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2478 stmt_vinfo, synth_shift_p);
2479 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2480 break;
2481 case alg_sub_t2_m:
2482 tmp_var
2483 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2484 stmt_vinfo, synth_shift_p);
2485 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2486 break;
2487 case alg_add_factor:
2488 tmp_var
2489 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2490 stmt_vinfo, synth_shift_p);
2491 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2492 tmp_var);
2493 break;
2494 case alg_sub_factor:
2495 tmp_var
2496 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2497 stmt_vinfo, synth_shift_p);
2498 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2499 accumulator);
2500 break;
2501 default:
2502 gcc_unreachable ();
2503 }
2504 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2505 but rather return it directly. */
2506
2507 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2508 append_pattern_def_seq (stmt_vinfo, stmt);
2509 accumulator = accum_tmp;
2510 }
2511 if (variant == negate_variant)
2512 {
2513 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2514 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2515 accumulator = accum_tmp;
2516 if (cast_to_unsigned_p)
2517 append_pattern_def_seq (stmt_vinfo, stmt);
2518 }
2519 else if (variant == add_variant)
2520 {
2521 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2522 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2523 accumulator = accum_tmp;
2524 if (cast_to_unsigned_p)
2525 append_pattern_def_seq (stmt_vinfo, stmt);
2526 }
2527 /* Move back to a signed if needed. */
2528 if (cast_to_unsigned_p)
2529 {
2530 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2531 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2532 }
2533
2534 return stmt;
2535 }
2536
2537 /* Detect multiplication by constant and convert it into a sequence of
2538 shifts and additions, subtractions, negations. We reuse the
2539 choose_mult_variant algorithms from expmed.c
2540
2541 Input/Output:
2542
2543 STMT_VINFO: The stmt from which the pattern search begins,
2544 i.e. the mult stmt.
2545
2546 Output:
2547
2548 * TYPE_OUT: The type of the output of this pattern.
2549
2550 * Return value: A new stmt that will be used to replace
2551 the multiplication. */
2552
2553 static gimple *
2554 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2555 {
2556 gimple *last_stmt = stmt_vinfo->stmt;
2557 tree oprnd0, oprnd1, vectype, itype;
2558 gimple *pattern_stmt;
2559
2560 if (!is_gimple_assign (last_stmt))
2561 return NULL;
2562
2563 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2564 return NULL;
2565
2566 oprnd0 = gimple_assign_rhs1 (last_stmt);
2567 oprnd1 = gimple_assign_rhs2 (last_stmt);
2568 itype = TREE_TYPE (oprnd0);
2569
2570 if (TREE_CODE (oprnd0) != SSA_NAME
2571 || TREE_CODE (oprnd1) != INTEGER_CST
2572 || !INTEGRAL_TYPE_P (itype)
2573 || !type_has_mode_precision_p (itype))
2574 return NULL;
2575
2576 vectype = get_vectype_for_scalar_type (itype);
2577 if (vectype == NULL_TREE)
2578 return NULL;
2579
2580 /* If the target can handle vectorized multiplication natively,
2581 don't attempt to optimize this. */
2582 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2583 if (mul_optab != unknown_optab)
2584 {
2585 machine_mode vec_mode = TYPE_MODE (vectype);
2586 int icode = (int) optab_handler (mul_optab, vec_mode);
2587 if (icode != CODE_FOR_nothing)
2588 return NULL;
2589 }
2590
2591 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2592 if (!pattern_stmt)
2593 return NULL;
2594
2595 /* Pattern detected. */
2596 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2597
2598 *type_out = vectype;
2599
2600 return pattern_stmt;
2601 }
2602
2603 /* Detect a signed division by a constant that wouldn't be
2604 otherwise vectorized:
2605
2606 type a_t, b_t;
2607
2608 S1 a_t = b_t / N;
2609
2610 where type 'type' is an integral type and N is a constant.
2611
2612 Similarly handle modulo by a constant:
2613
2614 S4 a_t = b_t % N;
2615
2616 Input/Output:
2617
2618 * STMT_VINFO: The stmt from which the pattern search begins,
2619 i.e. the division stmt. S1 is replaced by if N is a power
2620 of two constant and type is signed:
2621 S3 y_t = b_t < 0 ? N - 1 : 0;
2622 S2 x_t = b_t + y_t;
2623 S1' a_t = x_t >> log2 (N);
2624
2625 S4 is replaced if N is a power of two constant and
2626 type is signed by (where *_T temporaries have unsigned type):
2627 S9 y_T = b_t < 0 ? -1U : 0U;
2628 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2629 S7 z_t = (type) z_T;
2630 S6 w_t = b_t + z_t;
2631 S5 x_t = w_t & (N - 1);
2632 S4' a_t = x_t - z_t;
2633
2634 Output:
2635
2636 * TYPE_OUT: The type of the output of this pattern.
2637
2638 * Return value: A new stmt that will be used to replace the division
2639 S1 or modulo S4 stmt. */
2640
2641 static gimple *
2642 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2643 {
2644 gimple *last_stmt = stmt_vinfo->stmt;
2645 tree oprnd0, oprnd1, vectype, itype, cond;
2646 gimple *pattern_stmt, *def_stmt;
2647 enum tree_code rhs_code;
2648 optab optab;
2649 tree q;
2650 int dummy_int, prec;
2651
2652 if (!is_gimple_assign (last_stmt))
2653 return NULL;
2654
2655 rhs_code = gimple_assign_rhs_code (last_stmt);
2656 switch (rhs_code)
2657 {
2658 case TRUNC_DIV_EXPR:
2659 case EXACT_DIV_EXPR:
2660 case TRUNC_MOD_EXPR:
2661 break;
2662 default:
2663 return NULL;
2664 }
2665
2666 oprnd0 = gimple_assign_rhs1 (last_stmt);
2667 oprnd1 = gimple_assign_rhs2 (last_stmt);
2668 itype = TREE_TYPE (oprnd0);
2669 if (TREE_CODE (oprnd0) != SSA_NAME
2670 || TREE_CODE (oprnd1) != INTEGER_CST
2671 || TREE_CODE (itype) != INTEGER_TYPE
2672 || !type_has_mode_precision_p (itype))
2673 return NULL;
2674
2675 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2676 vectype = get_vectype_for_scalar_type (itype);
2677 if (vectype == NULL_TREE)
2678 return NULL;
2679
2680 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2681 {
2682 /* If the target can handle vectorized division or modulo natively,
2683 don't attempt to optimize this, since native division is likely
2684 to give smaller code. */
2685 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2686 if (optab != unknown_optab)
2687 {
2688 machine_mode vec_mode = TYPE_MODE (vectype);
2689 int icode = (int) optab_handler (optab, vec_mode);
2690 if (icode != CODE_FOR_nothing)
2691 return NULL;
2692 }
2693 }
2694
2695 prec = TYPE_PRECISION (itype);
2696 if (integer_pow2p (oprnd1))
2697 {
2698 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2699 return NULL;
2700
2701 /* Pattern detected. */
2702 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2703
2704 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2705 build_int_cst (itype, 0));
2706 if (rhs_code == TRUNC_DIV_EXPR
2707 || rhs_code == EXACT_DIV_EXPR)
2708 {
2709 tree var = vect_recog_temp_ssa_var (itype, NULL);
2710 tree shift;
2711 def_stmt
2712 = gimple_build_assign (var, COND_EXPR, cond,
2713 fold_build2 (MINUS_EXPR, itype, oprnd1,
2714 build_int_cst (itype, 1)),
2715 build_int_cst (itype, 0));
2716 append_pattern_def_seq (stmt_vinfo, def_stmt);
2717 var = vect_recog_temp_ssa_var (itype, NULL);
2718 def_stmt
2719 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2720 gimple_assign_lhs (def_stmt));
2721 append_pattern_def_seq (stmt_vinfo, def_stmt);
2722
2723 shift = build_int_cst (itype, tree_log2 (oprnd1));
2724 pattern_stmt
2725 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2726 RSHIFT_EXPR, var, shift);
2727 }
2728 else
2729 {
2730 tree signmask;
2731 if (compare_tree_int (oprnd1, 2) == 0)
2732 {
2733 signmask = vect_recog_temp_ssa_var (itype, NULL);
2734 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2735 build_int_cst (itype, 1),
2736 build_int_cst (itype, 0));
2737 append_pattern_def_seq (stmt_vinfo, def_stmt);
2738 }
2739 else
2740 {
2741 tree utype
2742 = build_nonstandard_integer_type (prec, 1);
2743 tree vecutype = get_vectype_for_scalar_type (utype);
2744 tree shift
2745 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2746 - tree_log2 (oprnd1));
2747 tree var = vect_recog_temp_ssa_var (utype, NULL);
2748
2749 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2750 build_int_cst (utype, -1),
2751 build_int_cst (utype, 0));
2752 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2753 var = vect_recog_temp_ssa_var (utype, NULL);
2754 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2755 gimple_assign_lhs (def_stmt),
2756 shift);
2757 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2758 signmask = vect_recog_temp_ssa_var (itype, NULL);
2759 def_stmt
2760 = gimple_build_assign (signmask, NOP_EXPR, var);
2761 append_pattern_def_seq (stmt_vinfo, def_stmt);
2762 }
2763 def_stmt
2764 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2765 PLUS_EXPR, oprnd0, signmask);
2766 append_pattern_def_seq (stmt_vinfo, def_stmt);
2767 def_stmt
2768 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2769 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2770 fold_build2 (MINUS_EXPR, itype, oprnd1,
2771 build_int_cst (itype, 1)));
2772 append_pattern_def_seq (stmt_vinfo, def_stmt);
2773
2774 pattern_stmt
2775 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2776 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2777 signmask);
2778 }
2779
2780 *type_out = vectype;
2781 return pattern_stmt;
2782 }
2783
2784 if (prec > HOST_BITS_PER_WIDE_INT
2785 || integer_zerop (oprnd1))
2786 return NULL;
2787
2788 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2789 return NULL;
2790
2791 if (TYPE_UNSIGNED (itype))
2792 {
2793 unsigned HOST_WIDE_INT mh, ml;
2794 int pre_shift, post_shift;
2795 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2796 & GET_MODE_MASK (itype_mode));
2797 tree t1, t2, t3, t4;
2798
2799 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2800 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2801 return NULL;
2802
2803 /* Find a suitable multiplier and right shift count
2804 instead of multiplying with D. */
2805 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2806
2807 /* If the suggested multiplier is more than SIZE bits, we can do better
2808 for even divisors, using an initial right shift. */
2809 if (mh != 0 && (d & 1) == 0)
2810 {
2811 pre_shift = ctz_or_zero (d);
2812 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2813 &ml, &post_shift, &dummy_int);
2814 gcc_assert (!mh);
2815 }
2816 else
2817 pre_shift = 0;
2818
2819 if (mh != 0)
2820 {
2821 if (post_shift - 1 >= prec)
2822 return NULL;
2823
2824 /* t1 = oprnd0 h* ml;
2825 t2 = oprnd0 - t1;
2826 t3 = t2 >> 1;
2827 t4 = t1 + t3;
2828 q = t4 >> (post_shift - 1); */
2829 t1 = vect_recog_temp_ssa_var (itype, NULL);
2830 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2831 build_int_cst (itype, ml));
2832 append_pattern_def_seq (stmt_vinfo, def_stmt);
2833
2834 t2 = vect_recog_temp_ssa_var (itype, NULL);
2835 def_stmt
2836 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2837 append_pattern_def_seq (stmt_vinfo, def_stmt);
2838
2839 t3 = vect_recog_temp_ssa_var (itype, NULL);
2840 def_stmt
2841 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2842 append_pattern_def_seq (stmt_vinfo, def_stmt);
2843
2844 t4 = vect_recog_temp_ssa_var (itype, NULL);
2845 def_stmt
2846 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2847
2848 if (post_shift != 1)
2849 {
2850 append_pattern_def_seq (stmt_vinfo, def_stmt);
2851
2852 q = vect_recog_temp_ssa_var (itype, NULL);
2853 pattern_stmt
2854 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2855 build_int_cst (itype, post_shift - 1));
2856 }
2857 else
2858 {
2859 q = t4;
2860 pattern_stmt = def_stmt;
2861 }
2862 }
2863 else
2864 {
2865 if (pre_shift >= prec || post_shift >= prec)
2866 return NULL;
2867
2868 /* t1 = oprnd0 >> pre_shift;
2869 t2 = t1 h* ml;
2870 q = t2 >> post_shift; */
2871 if (pre_shift)
2872 {
2873 t1 = vect_recog_temp_ssa_var (itype, NULL);
2874 def_stmt
2875 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2876 build_int_cst (NULL, pre_shift));
2877 append_pattern_def_seq (stmt_vinfo, def_stmt);
2878 }
2879 else
2880 t1 = oprnd0;
2881
2882 t2 = vect_recog_temp_ssa_var (itype, NULL);
2883 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2884 build_int_cst (itype, ml));
2885
2886 if (post_shift)
2887 {
2888 append_pattern_def_seq (stmt_vinfo, def_stmt);
2889
2890 q = vect_recog_temp_ssa_var (itype, NULL);
2891 def_stmt
2892 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2893 build_int_cst (itype, post_shift));
2894 }
2895 else
2896 q = t2;
2897
2898 pattern_stmt = def_stmt;
2899 }
2900 }
2901 else
2902 {
2903 unsigned HOST_WIDE_INT ml;
2904 int post_shift;
2905 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2906 unsigned HOST_WIDE_INT abs_d;
2907 bool add = false;
2908 tree t1, t2, t3, t4;
2909
2910 /* Give up for -1. */
2911 if (d == -1)
2912 return NULL;
2913
2914 /* Since d might be INT_MIN, we have to cast to
2915 unsigned HOST_WIDE_INT before negating to avoid
2916 undefined signed overflow. */
2917 abs_d = (d >= 0
2918 ? (unsigned HOST_WIDE_INT) d
2919 : - (unsigned HOST_WIDE_INT) d);
2920
2921 /* n rem d = n rem -d */
2922 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2923 {
2924 d = abs_d;
2925 oprnd1 = build_int_cst (itype, abs_d);
2926 }
2927 else if (HOST_BITS_PER_WIDE_INT >= prec
2928 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2929 /* This case is not handled correctly below. */
2930 return NULL;
2931
2932 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2933 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2934 {
2935 add = true;
2936 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2937 }
2938 if (post_shift >= prec)
2939 return NULL;
2940
2941 /* t1 = oprnd0 h* ml; */
2942 t1 = vect_recog_temp_ssa_var (itype, NULL);
2943 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2944 build_int_cst (itype, ml));
2945
2946 if (add)
2947 {
2948 /* t2 = t1 + oprnd0; */
2949 append_pattern_def_seq (stmt_vinfo, def_stmt);
2950 t2 = vect_recog_temp_ssa_var (itype, NULL);
2951 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2952 }
2953 else
2954 t2 = t1;
2955
2956 if (post_shift)
2957 {
2958 /* t3 = t2 >> post_shift; */
2959 append_pattern_def_seq (stmt_vinfo, def_stmt);
2960 t3 = vect_recog_temp_ssa_var (itype, NULL);
2961 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2962 build_int_cst (itype, post_shift));
2963 }
2964 else
2965 t3 = t2;
2966
2967 wide_int oprnd0_min, oprnd0_max;
2968 int msb = 1;
2969 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2970 {
2971 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2972 msb = 0;
2973 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2974 msb = -1;
2975 }
2976
2977 if (msb == 0 && d >= 0)
2978 {
2979 /* q = t3; */
2980 q = t3;
2981 pattern_stmt = def_stmt;
2982 }
2983 else
2984 {
2985 /* t4 = oprnd0 >> (prec - 1);
2986 or if we know from VRP that oprnd0 >= 0
2987 t4 = 0;
2988 or if we know from VRP that oprnd0 < 0
2989 t4 = -1; */
2990 append_pattern_def_seq (stmt_vinfo, def_stmt);
2991 t4 = vect_recog_temp_ssa_var (itype, NULL);
2992 if (msb != 1)
2993 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2994 build_int_cst (itype, msb));
2995 else
2996 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2997 build_int_cst (itype, prec - 1));
2998 append_pattern_def_seq (stmt_vinfo, def_stmt);
2999
3000 /* q = t3 - t4; or q = t4 - t3; */
3001 q = vect_recog_temp_ssa_var (itype, NULL);
3002 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3003 d < 0 ? t3 : t4);
3004 }
3005 }
3006
3007 if (rhs_code == TRUNC_MOD_EXPR)
3008 {
3009 tree r, t1;
3010
3011 /* We divided. Now finish by:
3012 t1 = q * oprnd1;
3013 r = oprnd0 - t1; */
3014 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3015
3016 t1 = vect_recog_temp_ssa_var (itype, NULL);
3017 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3018 append_pattern_def_seq (stmt_vinfo, def_stmt);
3019
3020 r = vect_recog_temp_ssa_var (itype, NULL);
3021 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3022 }
3023
3024 /* Pattern detected. */
3025 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3026
3027 *type_out = vectype;
3028 return pattern_stmt;
3029 }
3030
3031 /* Function vect_recog_mixed_size_cond_pattern
3032
3033 Try to find the following pattern:
3034
3035 type x_t, y_t;
3036 TYPE a_T, b_T, c_T;
3037 loop:
3038 S1 a_T = x_t CMP y_t ? b_T : c_T;
3039
3040 where type 'TYPE' is an integral type which has different size
3041 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3042 than 'type', the constants need to fit into an integer type
3043 with the same width as 'type') or results of conversion from 'type'.
3044
3045 Input:
3046
3047 * STMT_VINFO: The stmt from which the pattern search begins.
3048
3049 Output:
3050
3051 * TYPE_OUT: The type of the output of this pattern.
3052
3053 * Return value: A new stmt that will be used to replace the pattern.
3054 Additionally a def_stmt is added.
3055
3056 a_it = x_t CMP y_t ? b_it : c_it;
3057 a_T = (TYPE) a_it; */
3058
3059 static gimple *
3060 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3061 {
3062 gimple *last_stmt = stmt_vinfo->stmt;
3063 tree cond_expr, then_clause, else_clause;
3064 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3065 gimple *pattern_stmt, *def_stmt;
3066 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3067 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3068 bool promotion;
3069 tree comp_scalar_type;
3070
3071 if (!is_gimple_assign (last_stmt)
3072 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3073 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3074 return NULL;
3075
3076 cond_expr = gimple_assign_rhs1 (last_stmt);
3077 then_clause = gimple_assign_rhs2 (last_stmt);
3078 else_clause = gimple_assign_rhs3 (last_stmt);
3079
3080 if (!COMPARISON_CLASS_P (cond_expr))
3081 return NULL;
3082
3083 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3084 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3085 if (comp_vectype == NULL_TREE)
3086 return NULL;
3087
3088 type = gimple_expr_type (last_stmt);
3089 if (types_compatible_p (type, comp_scalar_type)
3090 || ((TREE_CODE (then_clause) != INTEGER_CST
3091 || TREE_CODE (else_clause) != INTEGER_CST)
3092 && !INTEGRAL_TYPE_P (comp_scalar_type))
3093 || !INTEGRAL_TYPE_P (type))
3094 return NULL;
3095
3096 if ((TREE_CODE (then_clause) != INTEGER_CST
3097 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3098 &def_stmt0, &promotion))
3099 || (TREE_CODE (else_clause) != INTEGER_CST
3100 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3101 &def_stmt1, &promotion)))
3102 return NULL;
3103
3104 if (orig_type0 && orig_type1
3105 && !types_compatible_p (orig_type0, orig_type1))
3106 return NULL;
3107
3108 if (orig_type0)
3109 {
3110 if (!types_compatible_p (orig_type0, comp_scalar_type))
3111 return NULL;
3112 then_clause = gimple_assign_rhs1 (def_stmt0);
3113 itype = orig_type0;
3114 }
3115
3116 if (orig_type1)
3117 {
3118 if (!types_compatible_p (orig_type1, comp_scalar_type))
3119 return NULL;
3120 else_clause = gimple_assign_rhs1 (def_stmt1);
3121 itype = orig_type1;
3122 }
3123
3124
3125 HOST_WIDE_INT cmp_mode_size
3126 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3127
3128 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3129 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3130 return NULL;
3131
3132 vectype = get_vectype_for_scalar_type (type);
3133 if (vectype == NULL_TREE)
3134 return NULL;
3135
3136 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3137 return NULL;
3138
3139 if (itype == NULL_TREE)
3140 itype = build_nonstandard_integer_type (cmp_mode_size,
3141 TYPE_UNSIGNED (type));
3142
3143 if (itype == NULL_TREE
3144 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3145 return NULL;
3146
3147 vecitype = get_vectype_for_scalar_type (itype);
3148 if (vecitype == NULL_TREE)
3149 return NULL;
3150
3151 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3152 return NULL;
3153
3154 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3155 {
3156 if ((TREE_CODE (then_clause) == INTEGER_CST
3157 && !int_fits_type_p (then_clause, itype))
3158 || (TREE_CODE (else_clause) == INTEGER_CST
3159 && !int_fits_type_p (else_clause, itype)))
3160 return NULL;
3161 }
3162
3163 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3164 COND_EXPR, unshare_expr (cond_expr),
3165 fold_convert (itype, then_clause),
3166 fold_convert (itype, else_clause));
3167 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3168 NOP_EXPR, gimple_assign_lhs (def_stmt));
3169
3170 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3171 *type_out = vectype;
3172
3173 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3174
3175 return pattern_stmt;
3176 }
3177
3178
3179 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3180 true if bool VAR can and should be optimized that way. Assume it shouldn't
3181 in case it's a result of a comparison which can be directly vectorized into
3182 a vector comparison. Fills in STMTS with all stmts visited during the
3183 walk. */
3184
3185 static bool
3186 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3187 {
3188 tree rhs1;
3189 enum tree_code rhs_code;
3190
3191 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3192 if (!def_stmt_info)
3193 return false;
3194
3195 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3196 if (!def_stmt)
3197 return false;
3198
3199 if (stmts.contains (def_stmt))
3200 return true;
3201
3202 rhs1 = gimple_assign_rhs1 (def_stmt);
3203 rhs_code = gimple_assign_rhs_code (def_stmt);
3204 switch (rhs_code)
3205 {
3206 case SSA_NAME:
3207 if (! check_bool_pattern (rhs1, vinfo, stmts))
3208 return false;
3209 break;
3210
3211 CASE_CONVERT:
3212 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3213 return false;
3214 if (! check_bool_pattern (rhs1, vinfo, stmts))
3215 return false;
3216 break;
3217
3218 case BIT_NOT_EXPR:
3219 if (! check_bool_pattern (rhs1, vinfo, stmts))
3220 return false;
3221 break;
3222
3223 case BIT_AND_EXPR:
3224 case BIT_IOR_EXPR:
3225 case BIT_XOR_EXPR:
3226 if (! check_bool_pattern (rhs1, vinfo, stmts)
3227 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3228 return false;
3229 break;
3230
3231 default:
3232 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3233 {
3234 tree vecitype, comp_vectype;
3235
3236 /* If the comparison can throw, then is_gimple_condexpr will be
3237 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3238 if (stmt_could_throw_p (cfun, def_stmt))
3239 return false;
3240
3241 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3242 if (comp_vectype == NULL_TREE)
3243 return false;
3244
3245 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3246 if (mask_type
3247 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3248 return false;
3249
3250 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3251 {
3252 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3253 tree itype
3254 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3255 vecitype = get_vectype_for_scalar_type (itype);
3256 if (vecitype == NULL_TREE)
3257 return false;
3258 }
3259 else
3260 vecitype = comp_vectype;
3261 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3262 return false;
3263 }
3264 else
3265 return false;
3266 break;
3267 }
3268
3269 bool res = stmts.add (def_stmt);
3270 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3271 gcc_assert (!res);
3272
3273 return true;
3274 }
3275
3276
3277 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3278 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3279 pattern sequence. */
3280
3281 static tree
3282 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3283 {
3284 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3285 NOP_EXPR, var);
3286 append_pattern_def_seq (stmt_info, cast_stmt,
3287 get_vectype_for_scalar_type (type));
3288 return gimple_assign_lhs (cast_stmt);
3289 }
3290
3291 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3292 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3293 type, OUT_TYPE is the desired final integer type of the whole pattern.
3294 STMT_INFO is the info of the pattern root and is where pattern stmts should
3295 be associated with. DEFS is a map of pattern defs. */
3296
3297 static void
3298 adjust_bool_pattern (tree var, tree out_type,
3299 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3300 {
3301 gimple *stmt = SSA_NAME_DEF_STMT (var);
3302 enum tree_code rhs_code, def_rhs_code;
3303 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3304 location_t loc;
3305 gimple *pattern_stmt, *def_stmt;
3306 tree trueval = NULL_TREE;
3307
3308 rhs1 = gimple_assign_rhs1 (stmt);
3309 rhs2 = gimple_assign_rhs2 (stmt);
3310 rhs_code = gimple_assign_rhs_code (stmt);
3311 loc = gimple_location (stmt);
3312 switch (rhs_code)
3313 {
3314 case SSA_NAME:
3315 CASE_CONVERT:
3316 irhs1 = *defs.get (rhs1);
3317 itype = TREE_TYPE (irhs1);
3318 pattern_stmt
3319 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3320 SSA_NAME, irhs1);
3321 break;
3322
3323 case BIT_NOT_EXPR:
3324 irhs1 = *defs.get (rhs1);
3325 itype = TREE_TYPE (irhs1);
3326 pattern_stmt
3327 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3328 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3329 break;
3330
3331 case BIT_AND_EXPR:
3332 /* Try to optimize x = y & (a < b ? 1 : 0); into
3333 x = (a < b ? y : 0);
3334
3335 E.g. for:
3336 bool a_b, b_b, c_b;
3337 TYPE d_T;
3338
3339 S1 a_b = x1 CMP1 y1;
3340 S2 b_b = x2 CMP2 y2;
3341 S3 c_b = a_b & b_b;
3342 S4 d_T = (TYPE) c_b;
3343
3344 we would normally emit:
3345
3346 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3347 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3348 S3' c_T = a_T & b_T;
3349 S4' d_T = c_T;
3350
3351 but we can save one stmt by using the
3352 result of one of the COND_EXPRs in the other COND_EXPR and leave
3353 BIT_AND_EXPR stmt out:
3354
3355 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3356 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3357 S4' f_T = c_T;
3358
3359 At least when VEC_COND_EXPR is implemented using masks
3360 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3361 computes the comparison masks and ands it, in one case with
3362 all ones vector, in the other case with a vector register.
3363 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3364 often more expensive. */
3365 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3366 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3367 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3368 {
3369 irhs1 = *defs.get (rhs1);
3370 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3371 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3372 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3373 {
3374 rhs_code = def_rhs_code;
3375 rhs1 = def_rhs1;
3376 rhs2 = gimple_assign_rhs2 (def_stmt);
3377 trueval = irhs1;
3378 goto do_compare;
3379 }
3380 else
3381 irhs2 = *defs.get (rhs2);
3382 goto and_ior_xor;
3383 }
3384 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3385 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3386 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3387 {
3388 irhs2 = *defs.get (rhs2);
3389 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3390 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3391 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3392 {
3393 rhs_code = def_rhs_code;
3394 rhs1 = def_rhs1;
3395 rhs2 = gimple_assign_rhs2 (def_stmt);
3396 trueval = irhs2;
3397 goto do_compare;
3398 }
3399 else
3400 irhs1 = *defs.get (rhs1);
3401 goto and_ior_xor;
3402 }
3403 /* FALLTHRU */
3404 case BIT_IOR_EXPR:
3405 case BIT_XOR_EXPR:
3406 irhs1 = *defs.get (rhs1);
3407 irhs2 = *defs.get (rhs2);
3408 and_ior_xor:
3409 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3410 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3411 {
3412 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3413 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3414 int out_prec = TYPE_PRECISION (out_type);
3415 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3416 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3417 stmt_info);
3418 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3419 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3420 stmt_info);
3421 else
3422 {
3423 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3424 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3425 }
3426 }
3427 itype = TREE_TYPE (irhs1);
3428 pattern_stmt
3429 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3430 rhs_code, irhs1, irhs2);
3431 break;
3432
3433 default:
3434 do_compare:
3435 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3436 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3437 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3438 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3439 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3440 {
3441 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3442 itype
3443 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3444 }
3445 else
3446 itype = TREE_TYPE (rhs1);
3447 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3448 if (trueval == NULL_TREE)
3449 trueval = build_int_cst (itype, 1);
3450 else
3451 gcc_checking_assert (useless_type_conversion_p (itype,
3452 TREE_TYPE (trueval)));
3453 pattern_stmt
3454 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3455 COND_EXPR, cond_expr, trueval,
3456 build_int_cst (itype, 0));
3457 break;
3458 }
3459
3460 gimple_set_location (pattern_stmt, loc);
3461 append_pattern_def_seq (stmt_info, pattern_stmt,
3462 get_vectype_for_scalar_type (itype));
3463 defs.put (var, gimple_assign_lhs (pattern_stmt));
3464 }
3465
3466 /* Comparison function to qsort a vector of gimple stmts after UID. */
3467
3468 static int
3469 sort_after_uid (const void *p1, const void *p2)
3470 {
3471 const gimple *stmt1 = *(const gimple * const *)p1;
3472 const gimple *stmt2 = *(const gimple * const *)p2;
3473 return gimple_uid (stmt1) - gimple_uid (stmt2);
3474 }
3475
3476 /* Create pattern stmts for all stmts participating in the bool pattern
3477 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3478 OUT_TYPE. Return the def of the pattern root. */
3479
3480 static tree
3481 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3482 tree out_type, stmt_vec_info stmt_info)
3483 {
3484 /* Gather original stmts in the bool pattern in their order of appearance
3485 in the IL. */
3486 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3487 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3488 i != bool_stmt_set.end (); ++i)
3489 bool_stmts.quick_push (*i);
3490 bool_stmts.qsort (sort_after_uid);
3491
3492 /* Now process them in that order, producing pattern stmts. */
3493 hash_map <tree, tree> defs;
3494 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3495 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3496 out_type, stmt_info, defs);
3497
3498 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3499 gimple *pattern_stmt
3500 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3501 return gimple_assign_lhs (pattern_stmt);
3502 }
3503
3504 /* Helper for search_type_for_mask. */
3505
3506 static tree
3507 search_type_for_mask_1 (tree var, vec_info *vinfo,
3508 hash_map<gimple *, tree> &cache)
3509 {
3510 tree rhs1;
3511 enum tree_code rhs_code;
3512 tree res = NULL_TREE, res2;
3513
3514 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3515 return NULL_TREE;
3516
3517 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3518 if (!def_stmt_info)
3519 return NULL_TREE;
3520
3521 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3522 if (!def_stmt)
3523 return NULL_TREE;
3524
3525 tree *c = cache.get (def_stmt);
3526 if (c)
3527 return *c;
3528
3529 rhs_code = gimple_assign_rhs_code (def_stmt);
3530 rhs1 = gimple_assign_rhs1 (def_stmt);
3531
3532 switch (rhs_code)
3533 {
3534 case SSA_NAME:
3535 case BIT_NOT_EXPR:
3536 CASE_CONVERT:
3537 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3538 break;
3539
3540 case BIT_AND_EXPR:
3541 case BIT_IOR_EXPR:
3542 case BIT_XOR_EXPR:
3543 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3544 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3545 cache);
3546 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3547 res = res2;
3548 break;
3549
3550 default:
3551 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3552 {
3553 tree comp_vectype, mask_type;
3554
3555 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3556 {
3557 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3558 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3559 vinfo, cache);
3560 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3561 res = res2;
3562 break;
3563 }
3564
3565 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3566 if (comp_vectype == NULL_TREE)
3567 {
3568 res = NULL_TREE;
3569 break;
3570 }
3571
3572 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3573 if (!mask_type
3574 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3575 {
3576 res = NULL_TREE;
3577 break;
3578 }
3579
3580 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3581 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3582 {
3583 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3584 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3585 }
3586 else
3587 res = TREE_TYPE (rhs1);
3588 }
3589 }
3590
3591 cache.put (def_stmt, res);
3592 return res;
3593 }
3594
3595 /* Return the proper type for converting bool VAR into
3596 an integer value or NULL_TREE if no such type exists.
3597 The type is chosen so that converted value has the
3598 same number of elements as VAR's vector type. */
3599
3600 static tree
3601 search_type_for_mask (tree var, vec_info *vinfo)
3602 {
3603 hash_map<gimple *, tree> cache;
3604 return search_type_for_mask_1 (var, vinfo, cache);
3605 }
3606
3607 /* Function vect_recog_bool_pattern
3608
3609 Try to find pattern like following:
3610
3611 bool a_b, b_b, c_b, d_b, e_b;
3612 TYPE f_T;
3613 loop:
3614 S1 a_b = x1 CMP1 y1;
3615 S2 b_b = x2 CMP2 y2;
3616 S3 c_b = a_b & b_b;
3617 S4 d_b = x3 CMP3 y3;
3618 S5 e_b = c_b | d_b;
3619 S6 f_T = (TYPE) e_b;
3620
3621 where type 'TYPE' is an integral type. Or a similar pattern
3622 ending in
3623
3624 S6 f_Y = e_b ? r_Y : s_Y;
3625
3626 as results from if-conversion of a complex condition.
3627
3628 Input:
3629
3630 * STMT_VINFO: The stmt at the end from which the pattern
3631 search begins, i.e. cast of a bool to
3632 an integer type.
3633
3634 Output:
3635
3636 * TYPE_OUT: The type of the output of this pattern.
3637
3638 * Return value: A new stmt that will be used to replace the pattern.
3639
3640 Assuming size of TYPE is the same as size of all comparisons
3641 (otherwise some casts would be added where needed), the above
3642 sequence we create related pattern stmts:
3643 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3644 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3645 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3646 S5' e_T = c_T | d_T;
3647 S6' f_T = e_T;
3648
3649 Instead of the above S3' we could emit:
3650 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3651 S3' c_T = a_T | b_T;
3652 but the above is more efficient. */
3653
3654 static gimple *
3655 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3656 {
3657 gimple *last_stmt = stmt_vinfo->stmt;
3658 enum tree_code rhs_code;
3659 tree var, lhs, rhs, vectype;
3660 vec_info *vinfo = stmt_vinfo->vinfo;
3661 gimple *pattern_stmt;
3662
3663 if (!is_gimple_assign (last_stmt))
3664 return NULL;
3665
3666 var = gimple_assign_rhs1 (last_stmt);
3667 lhs = gimple_assign_lhs (last_stmt);
3668
3669 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3670 return NULL;
3671
3672 hash_set<gimple *> bool_stmts;
3673
3674 rhs_code = gimple_assign_rhs_code (last_stmt);
3675 if (CONVERT_EXPR_CODE_P (rhs_code))
3676 {
3677 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3678 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3679 return NULL;
3680 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3681 if (vectype == NULL_TREE)
3682 return NULL;
3683
3684 if (check_bool_pattern (var, vinfo, bool_stmts))
3685 {
3686 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
3687 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3688 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3689 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3690 else
3691 pattern_stmt
3692 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3693 }
3694 else
3695 {
3696 tree type = search_type_for_mask (var, vinfo);
3697 tree cst0, cst1, tmp;
3698
3699 if (!type)
3700 return NULL;
3701
3702 /* We may directly use cond with narrowed type to avoid
3703 multiple cond exprs with following result packing and
3704 perform single cond with packed mask instead. In case
3705 of widening we better make cond first and then extract
3706 results. */
3707 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3708 type = TREE_TYPE (lhs);
3709
3710 cst0 = build_int_cst (type, 0);
3711 cst1 = build_int_cst (type, 1);
3712 tmp = vect_recog_temp_ssa_var (type, NULL);
3713 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3714
3715 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3716 {
3717 tree new_vectype = get_vectype_for_scalar_type (type);
3718 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3719
3720 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3721 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3722 }
3723 }
3724
3725 *type_out = vectype;
3726 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3727
3728 return pattern_stmt;
3729 }
3730 else if (rhs_code == COND_EXPR
3731 && TREE_CODE (var) == SSA_NAME)
3732 {
3733 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3734 if (vectype == NULL_TREE)
3735 return NULL;
3736
3737 /* Build a scalar type for the boolean result that when
3738 vectorized matches the vector type of the result in
3739 size and number of elements. */
3740 unsigned prec
3741 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3742 TYPE_VECTOR_SUBPARTS (vectype));
3743
3744 tree type
3745 = build_nonstandard_integer_type (prec,
3746 TYPE_UNSIGNED (TREE_TYPE (var)));
3747 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3748 return NULL;
3749
3750 if (!check_bool_pattern (var, vinfo, bool_stmts))
3751 return NULL;
3752
3753 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
3754
3755 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3756 pattern_stmt
3757 = gimple_build_assign (lhs, COND_EXPR,
3758 build2 (NE_EXPR, boolean_type_node,
3759 rhs, build_int_cst (type, 0)),
3760 gimple_assign_rhs2 (last_stmt),
3761 gimple_assign_rhs3 (last_stmt));
3762 *type_out = vectype;
3763 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3764
3765 return pattern_stmt;
3766 }
3767 else if (rhs_code == SSA_NAME
3768 && STMT_VINFO_DATA_REF (stmt_vinfo))
3769 {
3770 stmt_vec_info pattern_stmt_info;
3771 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3772 gcc_assert (vectype != NULL_TREE);
3773 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3774 return NULL;
3775
3776 if (check_bool_pattern (var, vinfo, bool_stmts))
3777 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
3778 else
3779 {
3780 tree type = search_type_for_mask (var, vinfo);
3781 tree cst0, cst1, new_vectype;
3782
3783 if (!type)
3784 return NULL;
3785
3786 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3787 type = TREE_TYPE (vectype);
3788
3789 cst0 = build_int_cst (type, 0);
3790 cst1 = build_int_cst (type, 1);
3791 new_vectype = get_vectype_for_scalar_type (type);
3792
3793 rhs = vect_recog_temp_ssa_var (type, NULL);
3794 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3795 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3796 }
3797
3798 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3799 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3800 {
3801 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3802 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3803 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3804 rhs = rhs2;
3805 }
3806 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3807 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3808 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3809 *type_out = vectype;
3810 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3811
3812 return pattern_stmt;
3813 }
3814 else
3815 return NULL;
3816 }
3817
3818
3819 /* A helper for vect_recog_mask_conversion_pattern. Build
3820 conversion of MASK to a type suitable for masking VECTYPE.
3821 Built statement gets required vectype and is appended to
3822 a pattern sequence of STMT_VINFO.
3823
3824 Return converted mask. */
3825
3826 static tree
3827 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
3828 {
3829 gimple *stmt;
3830 tree masktype, tmp;
3831
3832 masktype = build_same_sized_truth_vector_type (vectype);
3833 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3834 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3835 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
3836
3837 return tmp;
3838 }
3839
3840
3841 /* Function vect_recog_mask_conversion_pattern
3842
3843 Try to find statements which require boolean type
3844 converison. Additional conversion statements are
3845 added to handle such cases. For example:
3846
3847 bool m_1, m_2, m_3;
3848 int i_4, i_5;
3849 double d_6, d_7;
3850 char c_1, c_2, c_3;
3851
3852 S1 m_1 = i_4 > i_5;
3853 S2 m_2 = d_6 < d_7;
3854 S3 m_3 = m_1 & m_2;
3855 S4 c_1 = m_3 ? c_2 : c_3;
3856
3857 Will be transformed into:
3858
3859 S1 m_1 = i_4 > i_5;
3860 S2 m_2 = d_6 < d_7;
3861 S3'' m_2' = (_Bool[bitsize=32])m_2
3862 S3' m_3' = m_1 & m_2';
3863 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3864 S4' c_1' = m_3'' ? c_2 : c_3; */
3865
3866 static gimple *
3867 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3868 {
3869 gimple *last_stmt = stmt_vinfo->stmt;
3870 enum tree_code rhs_code;
3871 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3872 tree vectype1, vectype2;
3873 stmt_vec_info pattern_stmt_info;
3874 vec_info *vinfo = stmt_vinfo->vinfo;
3875
3876 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3877 if (is_gimple_call (last_stmt)
3878 && gimple_call_internal_p (last_stmt))
3879 {
3880 gcall *pattern_stmt;
3881
3882 internal_fn ifn = gimple_call_internal_fn (last_stmt);
3883 int mask_argno = internal_fn_mask_index (ifn);
3884 if (mask_argno < 0)
3885 return NULL;
3886
3887 bool store_p = internal_store_fn_p (ifn);
3888 if (store_p)
3889 {
3890 int rhs_index = internal_fn_stored_value_index (ifn);
3891 tree rhs = gimple_call_arg (last_stmt, rhs_index);
3892 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
3893 }
3894 else
3895 {
3896 lhs = gimple_call_lhs (last_stmt);
3897 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3898 }
3899
3900 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3901 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3902 if (!mask_arg_type)
3903 return NULL;
3904 vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
3905
3906 if (!vectype1 || !vectype2
3907 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3908 TYPE_VECTOR_SUBPARTS (vectype2)))
3909 return NULL;
3910
3911 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3912
3913 auto_vec<tree, 8> args;
3914 unsigned int nargs = gimple_call_num_args (last_stmt);
3915 args.safe_grow (nargs);
3916 for (unsigned int i = 0; i < nargs; ++i)
3917 args[i] = ((int) i == mask_argno
3918 ? tmp
3919 : gimple_call_arg (last_stmt, i));
3920 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
3921
3922 if (!store_p)
3923 {
3924 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3925 gimple_call_set_lhs (pattern_stmt, lhs);
3926 }
3927 gimple_call_set_nothrow (pattern_stmt, true);
3928
3929 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3930 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3931 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3932
3933 *type_out = vectype1;
3934 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3935
3936 return pattern_stmt;
3937 }
3938
3939 if (!is_gimple_assign (last_stmt))
3940 return NULL;
3941
3942 gimple *pattern_stmt;
3943 lhs = gimple_assign_lhs (last_stmt);
3944 rhs1 = gimple_assign_rhs1 (last_stmt);
3945 rhs_code = gimple_assign_rhs_code (last_stmt);
3946
3947 /* Check for cond expression requiring mask conversion. */
3948 if (rhs_code == COND_EXPR)
3949 {
3950 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3951
3952 if (TREE_CODE (rhs1) == SSA_NAME)
3953 {
3954 rhs1_type = search_type_for_mask (rhs1, vinfo);
3955 if (!rhs1_type)
3956 return NULL;
3957 }
3958 else if (COMPARISON_CLASS_P (rhs1))
3959 {
3960 /* Check whether we're comparing scalar booleans and (if so)
3961 whether a better mask type exists than the mask associated
3962 with boolean-sized elements. This avoids unnecessary packs
3963 and unpacks if the booleans are set from comparisons of
3964 wider types. E.g. in:
3965
3966 int x1, x2, x3, x4, y1, y1;
3967 ...
3968 bool b1 = (x1 == x2);
3969 bool b2 = (x3 == x4);
3970 ... = b1 == b2 ? y1 : y2;
3971
3972 it is better for b1 and b2 to use the mask type associated
3973 with int elements rather bool (byte) elements. */
3974 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
3975 if (!rhs1_type)
3976 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3977 }
3978 else
3979 return NULL;
3980
3981 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3982
3983 if (!vectype1 || !vectype2)
3984 return NULL;
3985
3986 /* Continue if a conversion is needed. Also continue if we have
3987 a comparison whose vector type would normally be different from
3988 VECTYPE2 when considered in isolation. In that case we'll
3989 replace the comparison with an SSA name (so that we can record
3990 its vector type) and behave as though the comparison was an SSA
3991 name from the outset. */
3992 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3993 TYPE_VECTOR_SUBPARTS (vectype2))
3994 && (TREE_CODE (rhs1) == SSA_NAME
3995 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
3996 return NULL;
3997
3998 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
3999 in place, we can handle it in vectorizable_condition. This avoids
4000 unnecessary promotion stmts and increased vectorization factor. */
4001 if (COMPARISON_CLASS_P (rhs1)
4002 && INTEGRAL_TYPE_P (rhs1_type)
4003 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4004 TYPE_VECTOR_SUBPARTS (vectype2)))
4005 {
4006 enum vect_def_type dt;
4007 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4008 && dt == vect_external_def
4009 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4010 && (dt == vect_external_def
4011 || dt == vect_constant_def))
4012 {
4013 tree wide_scalar_type = build_nonstandard_integer_type
4014 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4015 TYPE_UNSIGNED (rhs1_type));
4016 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4017 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4018 return NULL;
4019 }
4020 }
4021
4022 /* If rhs1 is a comparison we need to move it into a
4023 separate statement. */
4024 if (TREE_CODE (rhs1) != SSA_NAME)
4025 {
4026 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4027 pattern_stmt = gimple_build_assign (tmp, rhs1);
4028 rhs1 = tmp;
4029 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
4030 }
4031
4032 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4033 TYPE_VECTOR_SUBPARTS (vectype2)))
4034 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4035 else
4036 tmp = rhs1;
4037
4038 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4039 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4040 gimple_assign_rhs2 (last_stmt),
4041 gimple_assign_rhs3 (last_stmt));
4042
4043 *type_out = vectype1;
4044 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4045
4046 return pattern_stmt;
4047 }
4048
4049 /* Now check for binary boolean operations requiring conversion for
4050 one of operands. */
4051 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4052 return NULL;
4053
4054 if (rhs_code != BIT_IOR_EXPR
4055 && rhs_code != BIT_XOR_EXPR
4056 && rhs_code != BIT_AND_EXPR
4057 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4058 return NULL;
4059
4060 rhs2 = gimple_assign_rhs2 (last_stmt);
4061
4062 rhs1_type = search_type_for_mask (rhs1, vinfo);
4063 rhs2_type = search_type_for_mask (rhs2, vinfo);
4064
4065 if (!rhs1_type || !rhs2_type
4066 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4067 return NULL;
4068
4069 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4070 {
4071 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4072 if (!vectype1)
4073 return NULL;
4074 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4075 }
4076 else
4077 {
4078 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4079 if (!vectype1)
4080 return NULL;
4081 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4082 }
4083
4084 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4085 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4086
4087 *type_out = vectype1;
4088 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4089
4090 return pattern_stmt;
4091 }
4092
4093 /* STMT_INFO is a load or store. If the load or store is conditional, return
4094 the boolean condition under which it occurs, otherwise return null. */
4095
4096 static tree
4097 vect_get_load_store_mask (stmt_vec_info stmt_info)
4098 {
4099 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4100 {
4101 gcc_assert (gimple_assign_single_p (def_assign));
4102 return NULL_TREE;
4103 }
4104
4105 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4106 {
4107 internal_fn ifn = gimple_call_internal_fn (def_call);
4108 int mask_index = internal_fn_mask_index (ifn);
4109 return gimple_call_arg (def_call, mask_index);
4110 }
4111
4112 gcc_unreachable ();
4113 }
4114
4115 /* Return the scalar offset type that an internal gather/scatter function
4116 should use. GS_INFO describes the gather/scatter operation. */
4117
4118 static tree
4119 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4120 {
4121 tree offset_type = TREE_TYPE (gs_info->offset);
4122 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4123
4124 /* Enforced by vect_check_gather_scatter. */
4125 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4126 gcc_assert (element_bits >= offset_bits);
4127
4128 /* If the offset is narrower than the elements, extend it according
4129 to its sign. */
4130 if (element_bits > offset_bits)
4131 return build_nonstandard_integer_type (element_bits,
4132 TYPE_UNSIGNED (offset_type));
4133
4134 return offset_type;
4135 }
4136
4137 /* Return MASK if MASK is suitable for masking an operation on vectors
4138 of type VECTYPE, otherwise convert it into such a form and return
4139 the result. Associate any conversion statements with STMT_INFO's
4140 pattern. */
4141
4142 static tree
4143 vect_convert_mask_for_vectype (tree mask, tree vectype,
4144 stmt_vec_info stmt_info, vec_info *vinfo)
4145 {
4146 tree mask_type = search_type_for_mask (mask, vinfo);
4147 if (mask_type)
4148 {
4149 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4150 if (mask_vectype
4151 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4152 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4153 mask = build_mask_conversion (mask, vectype, stmt_info);
4154 }
4155 return mask;
4156 }
4157
4158 /* Return the equivalent of:
4159
4160 fold_convert (TYPE, VALUE)
4161
4162 with the expectation that the operation will be vectorized.
4163 If new statements are needed, add them as pattern statements
4164 to STMT_INFO. */
4165
4166 static tree
4167 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4168 {
4169 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4170 return value;
4171
4172 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4173 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4174 append_pattern_def_seq (stmt_info, conversion,
4175 get_vectype_for_scalar_type (type));
4176 return new_value;
4177 }
4178
4179 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4180 internal function. Return the final statement on success and set
4181 *TYPE_OUT to the vector type being loaded or stored.
4182
4183 This function only handles gathers and scatters that were recognized
4184 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4185
4186 static gimple *
4187 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4188 {
4189 /* Currently we only support this for loop vectorization. */
4190 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4191 if (!loop_vinfo)
4192 return NULL;
4193
4194 /* Make sure that we're looking at a gather load or scatter store. */
4195 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4196 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4197 return NULL;
4198
4199 /* Get the boolean that controls whether the load or store happens.
4200 This is null if the operation is unconditional. */
4201 tree mask = vect_get_load_store_mask (stmt_info);
4202
4203 /* Make sure that the target supports an appropriate internal
4204 function for the gather/scatter operation. */
4205 gather_scatter_info gs_info;
4206 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4207 || gs_info.decl)
4208 return NULL;
4209
4210 /* Convert the mask to the right form. */
4211 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4212 if (mask)
4213 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4214 loop_vinfo);
4215
4216 /* Get the invariant base and non-invariant offset, converting the
4217 latter to the same width as the vector elements. */
4218 tree base = gs_info.base;
4219 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4220 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4221 stmt_info);
4222
4223 /* Build the new pattern statement. */
4224 tree scale = size_int (gs_info.scale);
4225 gcall *pattern_stmt;
4226 if (DR_IS_READ (dr))
4227 {
4228 if (mask != NULL)
4229 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4230 offset, scale, mask);
4231 else
4232 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4233 offset, scale);
4234 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4235 gimple_call_set_lhs (pattern_stmt, load_lhs);
4236 }
4237 else
4238 {
4239 tree rhs = vect_get_store_rhs (stmt_info);
4240 if (mask != NULL)
4241 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4242 base, offset, scale, rhs,
4243 mask);
4244 else
4245 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4246 base, offset, scale, rhs);
4247 }
4248 gimple_call_set_nothrow (pattern_stmt, true);
4249
4250 /* Copy across relevant vectorization info and associate DR with the
4251 new pattern statement instead of the original statement. */
4252 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4253 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4254
4255 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4256 *type_out = vectype;
4257 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4258
4259 return pattern_stmt;
4260 }
4261
4262 /* Return true if TYPE is a non-boolean integer type. These are the types
4263 that we want to consider for narrowing. */
4264
4265 static bool
4266 vect_narrowable_type_p (tree type)
4267 {
4268 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4269 }
4270
4271 /* Return true if the operation given by CODE can be truncated to N bits
4272 when only N bits of the output are needed. This is only true if bit N+1
4273 of the inputs has no effect on the low N bits of the result. */
4274
4275 static bool
4276 vect_truncatable_operation_p (tree_code code)
4277 {
4278 switch (code)
4279 {
4280 case PLUS_EXPR:
4281 case MINUS_EXPR:
4282 case MULT_EXPR:
4283 case BIT_AND_EXPR:
4284 case BIT_IOR_EXPR:
4285 case BIT_XOR_EXPR:
4286 case COND_EXPR:
4287 return true;
4288
4289 default:
4290 return false;
4291 }
4292 }
4293
4294 /* Record that STMT_INFO could be changed from operating on TYPE to
4295 operating on a type with the precision and sign given by PRECISION
4296 and SIGN respectively. PRECISION is an arbitrary bit precision;
4297 it might not be a whole number of bytes. */
4298
4299 static void
4300 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4301 unsigned int precision, signop sign)
4302 {
4303 /* Round the precision up to a whole number of bytes. */
4304 precision = vect_element_precision (precision);
4305 if (precision < TYPE_PRECISION (type)
4306 && (!stmt_info->operation_precision
4307 || stmt_info->operation_precision > precision))
4308 {
4309 stmt_info->operation_precision = precision;
4310 stmt_info->operation_sign = sign;
4311 }
4312 }
4313
4314 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4315 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4316 is an arbitrary bit precision; it might not be a whole number of bytes. */
4317
4318 static void
4319 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4320 unsigned int min_input_precision)
4321 {
4322 /* This operation in isolation only requires the inputs to have
4323 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4324 that MIN_INPUT_PRECISION is a natural precision for the chain
4325 as a whole. E.g. consider something like:
4326
4327 unsigned short *x, *y;
4328 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4329
4330 The right shift can be done on unsigned chars, and only requires the
4331 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4332 approach would mean turning a natural chain of single-vector unsigned
4333 short operations into one that truncates "*x" and then extends
4334 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4335 operation and one vector for each unsigned char operation.
4336 This would be a significant pessimization.
4337
4338 Instead only propagate the maximum of this precision and the precision
4339 required by the users of the result. This means that we don't pessimize
4340 the case above but continue to optimize things like:
4341
4342 unsigned char *y;
4343 unsigned short *x;
4344 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4345
4346 Here we would truncate two vectors of *x to a single vector of
4347 unsigned chars and use single-vector unsigned char operations for
4348 everything else, rather than doing two unsigned short copies of
4349 "(*x & 0xf0) >> 4" and then truncating the result. */
4350 min_input_precision = MAX (min_input_precision,
4351 stmt_info->min_output_precision);
4352
4353 if (min_input_precision < TYPE_PRECISION (type)
4354 && (!stmt_info->min_input_precision
4355 || stmt_info->min_input_precision > min_input_precision))
4356 stmt_info->min_input_precision = min_input_precision;
4357 }
4358
4359 /* Subroutine of vect_determine_min_output_precision. Return true if
4360 we can calculate a reduced number of output bits for STMT_INFO,
4361 whose result is LHS. */
4362
4363 static bool
4364 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4365 {
4366 /* Take the maximum precision required by users of the result. */
4367 vec_info *vinfo = stmt_info->vinfo;
4368 unsigned int precision = 0;
4369 imm_use_iterator iter;
4370 use_operand_p use;
4371 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4372 {
4373 gimple *use_stmt = USE_STMT (use);
4374 if (is_gimple_debug (use_stmt))
4375 continue;
4376 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4377 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4378 return false;
4379 /* The input precision recorded for COND_EXPRs applies only to the
4380 "then" and "else" values. */
4381 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4382 if (assign
4383 && gimple_assign_rhs_code (assign) == COND_EXPR
4384 && use->use != gimple_assign_rhs2_ptr (assign)
4385 && use->use != gimple_assign_rhs3_ptr (assign))
4386 return false;
4387 precision = MAX (precision, use_stmt_info->min_input_precision);
4388 }
4389
4390 if (dump_enabled_p ())
4391 dump_printf_loc (MSG_NOTE, vect_location,
4392 "only the low %d bits of %T are significant\n",
4393 precision, lhs);
4394 stmt_info->min_output_precision = precision;
4395 return true;
4396 }
4397
4398 /* Calculate min_output_precision for STMT_INFO. */
4399
4400 static void
4401 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4402 {
4403 /* We're only interested in statements with a narrowable result. */
4404 tree lhs = gimple_get_lhs (stmt_info->stmt);
4405 if (!lhs
4406 || TREE_CODE (lhs) != SSA_NAME
4407 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4408 return;
4409
4410 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4411 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4412 }
4413
4414 /* Use range information to decide whether STMT (described by STMT_INFO)
4415 could be done in a narrower type. This is effectively a forward
4416 propagation, since it uses context-independent information that applies
4417 to all users of an SSA name. */
4418
4419 static void
4420 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4421 {
4422 tree lhs = gimple_assign_lhs (stmt);
4423 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4424 return;
4425
4426 tree type = TREE_TYPE (lhs);
4427 if (!vect_narrowable_type_p (type))
4428 return;
4429
4430 /* First see whether we have any useful range information for the result. */
4431 unsigned int precision = TYPE_PRECISION (type);
4432 signop sign = TYPE_SIGN (type);
4433 wide_int min_value, max_value;
4434 if (!vect_get_range_info (lhs, &min_value, &max_value))
4435 return;
4436
4437 tree_code code = gimple_assign_rhs_code (stmt);
4438 unsigned int nops = gimple_num_ops (stmt);
4439
4440 if (!vect_truncatable_operation_p (code))
4441 /* Check that all relevant input operands are compatible, and update
4442 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4443 for (unsigned int i = 1; i < nops; ++i)
4444 {
4445 tree op = gimple_op (stmt, i);
4446 if (TREE_CODE (op) == INTEGER_CST)
4447 {
4448 /* Don't require the integer to have RHS_TYPE (which it might
4449 not for things like shift amounts, etc.), but do require it
4450 to fit the type. */
4451 if (!int_fits_type_p (op, type))
4452 return;
4453
4454 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4455 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4456 }
4457 else if (TREE_CODE (op) == SSA_NAME)
4458 {
4459 /* Ignore codes that don't take uniform arguments. */
4460 if (!types_compatible_p (TREE_TYPE (op), type))
4461 return;
4462
4463 wide_int op_min_value, op_max_value;
4464 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4465 return;
4466
4467 min_value = wi::min (min_value, op_min_value, sign);
4468 max_value = wi::max (max_value, op_max_value, sign);
4469 }
4470 else
4471 return;
4472 }
4473
4474 /* Try to switch signed types for unsigned types if we can.
4475 This is better for two reasons. First, unsigned ops tend
4476 to be cheaper than signed ops. Second, it means that we can
4477 handle things like:
4478
4479 signed char c;
4480 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4481
4482 as:
4483
4484 signed char c;
4485 unsigned short res_1 = (unsigned short) c & 0xff00;
4486 int res = (int) res_1;
4487
4488 where the intermediate result res_1 has unsigned rather than
4489 signed type. */
4490 if (sign == SIGNED && !wi::neg_p (min_value))
4491 sign = UNSIGNED;
4492
4493 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4494 unsigned int precision1 = wi::min_precision (min_value, sign);
4495 unsigned int precision2 = wi::min_precision (max_value, sign);
4496 unsigned int value_precision = MAX (precision1, precision2);
4497 if (value_precision >= precision)
4498 return;
4499
4500 if (dump_enabled_p ())
4501 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4502 " without loss of precision: %G",
4503 sign == SIGNED ? "signed" : "unsigned",
4504 value_precision, stmt);
4505
4506 vect_set_operation_type (stmt_info, type, value_precision, sign);
4507 vect_set_min_input_precision (stmt_info, type, value_precision);
4508 }
4509
4510 /* Use information about the users of STMT's result to decide whether
4511 STMT (described by STMT_INFO) could be done in a narrower type.
4512 This is effectively a backward propagation. */
4513
4514 static void
4515 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4516 {
4517 tree_code code = gimple_assign_rhs_code (stmt);
4518 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4519 tree type = TREE_TYPE (gimple_op (stmt, opno));
4520 if (!vect_narrowable_type_p (type))
4521 return;
4522
4523 unsigned int precision = TYPE_PRECISION (type);
4524 unsigned int operation_precision, min_input_precision;
4525 switch (code)
4526 {
4527 CASE_CONVERT:
4528 /* Only the bits that contribute to the output matter. Don't change
4529 the precision of the operation itself. */
4530 operation_precision = precision;
4531 min_input_precision = stmt_info->min_output_precision;
4532 break;
4533
4534 case LSHIFT_EXPR:
4535 case RSHIFT_EXPR:
4536 {
4537 tree shift = gimple_assign_rhs2 (stmt);
4538 if (TREE_CODE (shift) != INTEGER_CST
4539 || !wi::ltu_p (wi::to_widest (shift), precision))
4540 return;
4541 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4542 if (code == LSHIFT_EXPR)
4543 {
4544 /* We need CONST_SHIFT fewer bits of the input. */
4545 operation_precision = stmt_info->min_output_precision;
4546 min_input_precision = (MAX (operation_precision, const_shift)
4547 - const_shift);
4548 }
4549 else
4550 {
4551 /* We need CONST_SHIFT extra bits to do the operation. */
4552 operation_precision = (stmt_info->min_output_precision
4553 + const_shift);
4554 min_input_precision = operation_precision;
4555 }
4556 break;
4557 }
4558
4559 default:
4560 if (vect_truncatable_operation_p (code))
4561 {
4562 /* Input bit N has no effect on output bits N-1 and lower. */
4563 operation_precision = stmt_info->min_output_precision;
4564 min_input_precision = operation_precision;
4565 break;
4566 }
4567 return;
4568 }
4569
4570 if (operation_precision < precision)
4571 {
4572 if (dump_enabled_p ())
4573 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4574 " without affecting users: %G",
4575 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4576 operation_precision, stmt);
4577 vect_set_operation_type (stmt_info, type, operation_precision,
4578 TYPE_SIGN (type));
4579 }
4580 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4581 }
4582
4583 /* Handle vect_determine_precisions for STMT_INFO, given that we
4584 have already done so for the users of its result. */
4585
4586 void
4587 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4588 {
4589 vect_determine_min_output_precision (stmt_info);
4590 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4591 {
4592 vect_determine_precisions_from_range (stmt_info, stmt);
4593 vect_determine_precisions_from_users (stmt_info, stmt);
4594 }
4595 }
4596
4597 /* Walk backwards through the vectorizable region to determine the
4598 values of these fields:
4599
4600 - min_output_precision
4601 - min_input_precision
4602 - operation_precision
4603 - operation_sign. */
4604
4605 void
4606 vect_determine_precisions (vec_info *vinfo)
4607 {
4608 DUMP_VECT_SCOPE ("vect_determine_precisions");
4609
4610 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4611 {
4612 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4613 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4614 unsigned int nbbs = loop->num_nodes;
4615
4616 for (unsigned int i = 0; i < nbbs; i++)
4617 {
4618 basic_block bb = bbs[nbbs - i - 1];
4619 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4620 !gsi_end_p (si); gsi_prev (&si))
4621 vect_determine_stmt_precisions
4622 (vinfo->lookup_stmt (gsi_stmt (si)));
4623 }
4624 }
4625 else
4626 {
4627 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4628 gimple_stmt_iterator si = bb_vinfo->region_end;
4629 gimple *stmt;
4630 do
4631 {
4632 if (!gsi_stmt (si))
4633 si = gsi_last_bb (bb_vinfo->bb);
4634 else
4635 gsi_prev (&si);
4636 stmt = gsi_stmt (si);
4637 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
4638 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4639 vect_determine_stmt_precisions (stmt_info);
4640 }
4641 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4642 }
4643 }
4644
4645 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
4646
4647 struct vect_recog_func
4648 {
4649 vect_recog_func_ptr fn;
4650 const char *name;
4651 };
4652
4653 /* Note that ordering matters - the first pattern matching on a stmt is
4654 taken which means usually the more complex one needs to preceed the
4655 less comples onex (widen_sum only after dot_prod or sad for example). */
4656 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4657 { vect_recog_over_widening_pattern, "over_widening" },
4658 /* Must come after over_widening, which narrows the shift as much as
4659 possible beforehand. */
4660 { vect_recog_average_pattern, "average" },
4661 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4662 { vect_recog_widen_mult_pattern, "widen_mult" },
4663 { vect_recog_dot_prod_pattern, "dot_prod" },
4664 { vect_recog_sad_pattern, "sad" },
4665 { vect_recog_widen_sum_pattern, "widen_sum" },
4666 { vect_recog_pow_pattern, "pow" },
4667 { vect_recog_widen_shift_pattern, "widen_shift" },
4668 { vect_recog_rotate_pattern, "rotate" },
4669 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4670 { vect_recog_divmod_pattern, "divmod" },
4671 { vect_recog_mult_pattern, "mult" },
4672 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4673 { vect_recog_bool_pattern, "bool" },
4674 /* This must come before mask conversion, and includes the parts
4675 of mask conversion that are needed for gather and scatter
4676 internal functions. */
4677 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4678 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4679 };
4680
4681 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4682
4683 /* Mark statements that are involved in a pattern. */
4684
4685 static inline void
4686 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
4687 tree pattern_vectype)
4688 {
4689 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4690
4691 gimple *orig_pattern_stmt = NULL;
4692 if (is_pattern_stmt_p (orig_stmt_info))
4693 {
4694 /* We're replacing a statement in an existing pattern definition
4695 sequence. */
4696 orig_pattern_stmt = orig_stmt_info->stmt;
4697 if (dump_enabled_p ())
4698 dump_printf_loc (MSG_NOTE, vect_location,
4699 "replacing earlier pattern %G", orig_pattern_stmt);
4700
4701 /* To keep the book-keeping simple, just swap the lhs of the
4702 old and new statements, so that the old one has a valid but
4703 unused lhs. */
4704 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
4705 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
4706 gimple_set_lhs (pattern_stmt, old_lhs);
4707
4708 if (dump_enabled_p ())
4709 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
4710
4711 /* Switch to the statement that ORIG replaces. */
4712 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
4713
4714 /* We shouldn't be replacing the main pattern statement. */
4715 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
4716 != orig_pattern_stmt);
4717 }
4718
4719 if (def_seq)
4720 for (gimple_stmt_iterator si = gsi_start (def_seq);
4721 !gsi_end_p (si); gsi_next (&si))
4722 vect_init_pattern_stmt (gsi_stmt (si), orig_stmt_info, pattern_vectype);
4723
4724 if (orig_pattern_stmt)
4725 {
4726 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4727
4728 /* Insert all the new pattern statements before the original one. */
4729 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4730 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
4731 orig_def_seq);
4732 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4733 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4734
4735 /* Remove the pattern statement that this new pattern replaces. */
4736 gsi_remove (&gsi, false);
4737 }
4738 else
4739 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4740 }
4741
4742 /* Function vect_pattern_recog_1
4743
4744 Input:
4745 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4746 computation pattern.
4747 STMT_INFO: A stmt from which the pattern search should start.
4748
4749 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4750 a sequence of statements that has the same functionality and can be
4751 used to replace STMT_INFO. It returns the last statement in the sequence
4752 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
4753 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4754 statement, having first checked that the target supports the new operation
4755 in that type.
4756
4757 This function also does some bookkeeping, as explained in the documentation
4758 for vect_recog_pattern. */
4759
4760 static void
4761 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
4762 {
4763 vec_info *vinfo = stmt_info->vinfo;
4764 gimple *pattern_stmt;
4765 loop_vec_info loop_vinfo;
4766 tree pattern_vectype;
4767
4768 /* If this statement has already been replaced with pattern statements,
4769 leave the original statement alone, since the first match wins.
4770 Instead try to match against the definition statements that feed
4771 the main pattern statement. */
4772 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4773 {
4774 gimple_stmt_iterator gsi;
4775 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4776 !gsi_end_p (gsi); gsi_next (&gsi))
4777 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
4778 return;
4779 }
4780
4781 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4782 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
4783 if (!pattern_stmt)
4784 {
4785 /* Clear any half-formed pattern definition sequence. */
4786 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4787 return;
4788 }
4789
4790 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4791 gcc_assert (pattern_vectype);
4792
4793 /* Found a vectorizable pattern. */
4794 if (dump_enabled_p ())
4795 dump_printf_loc (MSG_NOTE, vect_location,
4796 "%s pattern recognized: %G",
4797 recog_func->name, pattern_stmt);
4798
4799 /* Mark the stmts that are involved in the pattern. */
4800 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
4801
4802 /* Patterns cannot be vectorized using SLP, because they change the order of
4803 computation. */
4804 if (loop_vinfo)
4805 {
4806 unsigned ix, ix2;
4807 stmt_vec_info *elem_ptr;
4808 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4809 elem_ptr, *elem_ptr == stmt_info);
4810 }
4811 }
4812
4813
4814 /* Function vect_pattern_recog
4815
4816 Input:
4817 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4818 computation idioms.
4819
4820 Output - for each computation idiom that is detected we create a new stmt
4821 that provides the same functionality and that can be vectorized. We
4822 also record some information in the struct_stmt_info of the relevant
4823 stmts, as explained below:
4824
4825 At the entry to this function we have the following stmts, with the
4826 following initial value in the STMT_VINFO fields:
4827
4828 stmt in_pattern_p related_stmt vec_stmt
4829 S1: a_i = .... - - -
4830 S2: a_2 = ..use(a_i).. - - -
4831 S3: a_1 = ..use(a_2).. - - -
4832 S4: a_0 = ..use(a_1).. - - -
4833 S5: ... = ..use(a_0).. - - -
4834
4835 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4836 represented by a single stmt. We then:
4837 - create a new stmt S6 equivalent to the pattern (the stmt is not
4838 inserted into the code)
4839 - fill in the STMT_VINFO fields as follows:
4840
4841 in_pattern_p related_stmt vec_stmt
4842 S1: a_i = .... - - -
4843 S2: a_2 = ..use(a_i).. - - -
4844 S3: a_1 = ..use(a_2).. - - -
4845 S4: a_0 = ..use(a_1).. true S6 -
4846 '---> S6: a_new = .... - S4 -
4847 S5: ... = ..use(a_0).. - - -
4848
4849 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4850 to each other through the RELATED_STMT field).
4851
4852 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4853 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4854 remain irrelevant unless used by stmts other than S4.
4855
4856 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4857 (because they are marked as irrelevant). It will vectorize S6, and record
4858 a pointer to the new vector stmt VS6 from S6 (as usual).
4859 S4 will be skipped, and S5 will be vectorized as usual:
4860
4861 in_pattern_p related_stmt vec_stmt
4862 S1: a_i = .... - - -
4863 S2: a_2 = ..use(a_i).. - - -
4864 S3: a_1 = ..use(a_2).. - - -
4865 > VS6: va_new = .... - - -
4866 S4: a_0 = ..use(a_1).. true S6 VS6
4867 '---> S6: a_new = .... - S4 VS6
4868 > VS5: ... = ..vuse(va_new).. - - -
4869 S5: ... = ..use(a_0).. - - -
4870
4871 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4872 elsewhere), and we'll end up with:
4873
4874 VS6: va_new = ....
4875 VS5: ... = ..vuse(va_new)..
4876
4877 In case of more than one pattern statements, e.g., widen-mult with
4878 intermediate type:
4879
4880 S1 a_t = ;
4881 S2 a_T = (TYPE) a_t;
4882 '--> S3: a_it = (interm_type) a_t;
4883 S4 prod_T = a_T * CONST;
4884 '--> S5: prod_T' = a_it w* CONST;
4885
4886 there may be other users of a_T outside the pattern. In that case S2 will
4887 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4888 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4889 be recorded in S3. */
4890
4891 void
4892 vect_pattern_recog (vec_info *vinfo)
4893 {
4894 struct loop *loop;
4895 basic_block *bbs;
4896 unsigned int nbbs;
4897 gimple_stmt_iterator si;
4898 unsigned int i, j;
4899
4900 vect_determine_precisions (vinfo);
4901
4902 DUMP_VECT_SCOPE ("vect_pattern_recog");
4903
4904 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4905 {
4906 loop = LOOP_VINFO_LOOP (loop_vinfo);
4907 bbs = LOOP_VINFO_BBS (loop_vinfo);
4908 nbbs = loop->num_nodes;
4909
4910 /* Scan through the loop stmts, applying the pattern recognition
4911 functions starting at each stmt visited: */
4912 for (i = 0; i < nbbs; i++)
4913 {
4914 basic_block bb = bbs[i];
4915 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4916 {
4917 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
4918 /* Scan over all generic vect_recog_xxx_pattern functions. */
4919 for (j = 0; j < NUM_PATTERNS; j++)
4920 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
4921 stmt_info);
4922 }
4923 }
4924 }
4925 else
4926 {
4927 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4928 for (si = bb_vinfo->region_begin;
4929 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4930 {
4931 gimple *stmt = gsi_stmt (si);
4932 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
4933 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
4934 continue;
4935
4936 /* Scan over all generic vect_recog_xxx_pattern functions. */
4937 for (j = 0; j < NUM_PATTERNS; j++)
4938 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);
4939 }
4940 }
4941 }