]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
* diagnostic-core.h: Include bversion.h.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
4a61a337 1/* Analysis Utilities for Loop Vectorization.
ce084dfc 2 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
4a61a337 3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
8c4c00c1 9Software Foundation; either version 3, or (at your option) any later
4a61a337 10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
8c4c00c1 18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
4a61a337 20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "ggc.h"
26#include "tree.h"
4a61a337 27#include "target.h"
28#include "basic-block.h"
ce084dfc 29#include "gimple-pretty-print.h"
4a61a337 30#include "tree-flow.h"
31#include "tree-dump.h"
4a61a337 32#include "cfgloop.h"
33#include "expr.h"
34#include "optabs.h"
35#include "params.h"
36#include "tree-data-ref.h"
37#include "tree-vectorizer.h"
38#include "recog.h"
0b205f4c 39#include "diagnostic-core.h"
4a61a337 40
334ec2d8 41/* Function prototypes */
48e1416a 42static void vect_pattern_recog_1
75a70cf9 43 (gimple (* ) (gimple, tree *, tree *), gimple_stmt_iterator);
44static bool widened_name_p (tree, gimple, tree *, gimple *);
4a61a337 45
46/* Pattern recognition functions */
75a70cf9 47static gimple vect_recog_widen_sum_pattern (gimple, tree *, tree *);
48static gimple vect_recog_widen_mult_pattern (gimple, tree *, tree *);
49static gimple vect_recog_dot_prod_pattern (gimple, tree *, tree *);
50static gimple vect_recog_pow_pattern (gimple, tree *, tree *);
4a61a337 51static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
52 vect_recog_widen_mult_pattern,
53 vect_recog_widen_sum_pattern,
c37bea13 54 vect_recog_dot_prod_pattern,
55 vect_recog_pow_pattern};
4a61a337 56
57
58/* Function widened_name_p
59
60 Check whether NAME, an ssa-name used in USE_STMT,
61 is a result of a type-promotion, such that:
62 DEF_STMT: NAME = NOP (name0)
48e1416a 63 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
4a61a337 64*/
65
66static bool
75a70cf9 67widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt)
4a61a337 68{
69 tree dummy;
75a70cf9 70 gimple dummy_gimple;
4a61a337 71 loop_vec_info loop_vinfo;
72 stmt_vec_info stmt_vinfo;
4a61a337 73 tree type = TREE_TYPE (name);
74 tree oprnd0;
75 enum vect_def_type dt;
76 tree def;
77
78 stmt_vinfo = vinfo_for_stmt (use_stmt);
79 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
80
37545e54 81 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
4a61a337 82 return false;
83
f083cd24 84 if (dt != vect_internal_def
85 && dt != vect_external_def && dt != vect_constant_def)
4a61a337 86 return false;
87
88 if (! *def_stmt)
89 return false;
90
75a70cf9 91 if (!is_gimple_assign (*def_stmt))
4a61a337 92 return false;
93
75a70cf9 94 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
4a61a337 95 return false;
96
75a70cf9 97 oprnd0 = gimple_assign_rhs1 (*def_stmt);
4a61a337 98
99 *half_type = TREE_TYPE (oprnd0);
100 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
101 || (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type))
102 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
103 return false;
104
48e1416a 105 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
37545e54 106 &dt))
4a61a337 107 return false;
108
4a61a337 109 return true;
110}
111
75a70cf9 112/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
113 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
114
115static tree
116vect_recog_temp_ssa_var (tree type, gimple stmt)
117{
118 tree var = create_tmp_var (type, "patt");
119
120 add_referenced_var (var);
121 var = make_ssa_name (var, stmt);
122 return var;
123}
4a61a337 124
125/* Function vect_recog_dot_prod_pattern
126
127 Try to find the following pattern:
128
129 type x_t, y_t;
130 TYPE1 prod;
131 TYPE2 sum = init;
132 loop:
133 sum_0 = phi <init, sum_1>
134 S1 x_t = ...
135 S2 y_t = ...
136 S3 x_T = (TYPE1) x_t;
137 S4 y_T = (TYPE1) y_t;
138 S5 prod = x_T * y_T;
139 [S6 prod = (TYPE2) prod; #optional]
140 S7 sum_1 = prod + sum_0;
141
48e1416a 142 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
143 same size of 'TYPE1' or bigger. This is a special case of a reduction
4a61a337 144 computation.
48e1416a 145
4a61a337 146 Input:
147
148 * LAST_STMT: A stmt from which the pattern search begins. In the example,
149 when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be
150 detected.
151
152 Output:
153
154 * TYPE_IN: The type of the input arguments to the pattern.
155
156 * TYPE_OUT: The type of the output of this pattern.
157
158 * Return value: A new stmt that will be used to replace the sequence of
159 stmts that constitute the pattern. In this case it will be:
160 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
221e9a92 161
162 Note: The dot-prod idiom is a widening reduction pattern that is
163 vectorized without preserving all the intermediate results. It
164 produces only N/2 (widened) results (by summing up pairs of
165 intermediate results) rather than all N results. Therefore, we
166 cannot allow this pattern when we want to get all the results and in
167 the correct order (as is the case when this computation is in an
168 inner-loop nested in an outer-loop that us being vectorized). */
4a61a337 169
75a70cf9 170static gimple
171vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out)
4a61a337 172{
75a70cf9 173 gimple stmt;
4a61a337 174 tree oprnd0, oprnd1;
175 tree oprnd00, oprnd01;
176 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
177 tree type, half_type;
75a70cf9 178 gimple pattern_stmt;
4a61a337 179 tree prod_type;
221e9a92 180 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
181 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
75a70cf9 182 tree var, rhs;
4a61a337 183
75a70cf9 184 if (!is_gimple_assign (last_stmt))
4a61a337 185 return NULL;
186
75a70cf9 187 type = gimple_expr_type (last_stmt);
4a61a337 188
48e1416a 189 /* Look for the following pattern
4a61a337 190 DX = (TYPE1) X;
191 DY = (TYPE1) Y;
48e1416a 192 DPROD = DX * DY;
4a61a337 193 DDPROD = (TYPE2) DPROD;
194 sum_1 = DDPROD + sum_0;
48e1416a 195 In which
4a61a337 196 - DX is double the size of X
197 - DY is double the size of Y
198 - DX, DY, DPROD all have the same type
199 - sum is the same size of DPROD or bigger
200 - sum has been recognized as a reduction variable.
201
202 This is equivalent to:
203 DPROD = X w* Y; #widen mult
204 sum_1 = DPROD w+ sum_0; #widen summation
205 or
206 DPROD = X w* Y; #widen mult
207 sum_1 = DPROD + sum_0; #summation
208 */
209
210 /* Starting from LAST_STMT, follow the defs of its uses in search
211 of the above pattern. */
212
75a70cf9 213 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
4a61a337 214 return NULL;
215
216 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
217 {
218 /* Has been detected as widening-summation? */
219
220 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
75a70cf9 221 type = gimple_expr_type (stmt);
222 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
4a61a337 223 return NULL;
75a70cf9 224 oprnd0 = gimple_assign_rhs1 (stmt);
225 oprnd1 = gimple_assign_rhs2 (stmt);
4a61a337 226 half_type = TREE_TYPE (oprnd0);
227 }
228 else
229 {
75a70cf9 230 gimple def_stmt;
4a61a337 231
232 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
233 return NULL;
75a70cf9 234 oprnd0 = gimple_assign_rhs1 (last_stmt);
235 oprnd1 = gimple_assign_rhs2 (last_stmt);
1ea6a73c 236 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
237 || !types_compatible_p (TREE_TYPE (oprnd1), type))
4a61a337 238 return NULL;
239 stmt = last_stmt;
240
241 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt))
242 {
243 stmt = def_stmt;
75a70cf9 244 oprnd0 = gimple_assign_rhs1 (stmt);
4a61a337 245 }
246 else
247 half_type = type;
248 }
249
250 /* So far so good. Since last_stmt was detected as a (summation) reduction,
251 we know that oprnd1 is the reduction variable (defined by a loop-header
252 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
253 Left to check that oprnd0 is defined by a (widen_)mult_expr */
254
255 prod_type = half_type;
256 stmt = SSA_NAME_DEF_STMT (oprnd0);
4af22cd9 257
258 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
4ba74662 259 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
4af22cd9 260 return NULL;
261
48e1416a 262 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
e3e3ee4b 263 inside the loop (in case we are analyzing an outer-loop). */
75a70cf9 264 if (!is_gimple_assign (stmt))
48e1416a 265 return NULL;
4a61a337 266 stmt_vinfo = vinfo_for_stmt (stmt);
267 gcc_assert (stmt_vinfo);
f083cd24 268 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
23d9a167 269 return NULL;
75a70cf9 270 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
4a61a337 271 return NULL;
272 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
273 {
274 /* Has been detected as a widening multiplication? */
275
276 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
75a70cf9 277 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
4a61a337 278 return NULL;
279 stmt_vinfo = vinfo_for_stmt (stmt);
280 gcc_assert (stmt_vinfo);
f083cd24 281 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
75a70cf9 282 oprnd00 = gimple_assign_rhs1 (stmt);
283 oprnd01 = gimple_assign_rhs2 (stmt);
4a61a337 284 }
285 else
286 {
287 tree half_type0, half_type1;
75a70cf9 288 gimple def_stmt;
4a61a337 289 tree oprnd0, oprnd1;
290
75a70cf9 291 oprnd0 = gimple_assign_rhs1 (stmt);
292 oprnd1 = gimple_assign_rhs2 (stmt);
1ea6a73c 293 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
294 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
4a61a337 295 return NULL;
296 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt))
297 return NULL;
75a70cf9 298 oprnd00 = gimple_assign_rhs1 (def_stmt);
4a61a337 299 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt))
300 return NULL;
75a70cf9 301 oprnd01 = gimple_assign_rhs1 (def_stmt);
1ea6a73c 302 if (!types_compatible_p (half_type0, half_type1))
4a61a337 303 return NULL;
304 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
305 return NULL;
306 }
307
308 half_type = TREE_TYPE (oprnd00);
309 *type_in = half_type;
310 *type_out = type;
48e1416a 311
4a61a337 312 /* Pattern detected. Create a stmt to be used to replace the pattern: */
75a70cf9 313 var = vect_recog_temp_ssa_var (type, NULL);
314 rhs = build3 (DOT_PROD_EXPR, type, oprnd00, oprnd01, oprnd1),
315 pattern_stmt = gimple_build_assign (var, rhs);
48e1416a 316
4a61a337 317 if (vect_print_dump_info (REPORT_DETAILS))
318 {
319 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
75a70cf9 320 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
4a61a337 321 }
221e9a92 322
323 /* We don't allow changing the order of the computation in the inner-loop
324 when doing outer-loop vectorization. */
ade2ac53 325 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
221e9a92 326
75a70cf9 327 return pattern_stmt;
4a61a337 328}
48e1416a 329
4a61a337 330/* Function vect_recog_widen_mult_pattern
331
332 Try to find the following pattern:
333
334 type a_t, b_t;
335 TYPE a_T, b_T, prod_T;
336
337 S1 a_t = ;
338 S2 b_t = ;
339 S3 a_T = (TYPE) a_t;
340 S4 b_T = (TYPE) b_t;
341 S5 prod_T = a_T * b_T;
342
343 where type 'TYPE' is at least double the size of type 'type'.
344
345 Input:
346
347 * LAST_STMT: A stmt from which the pattern search begins. In the example,
348 when this function is called with S5, the pattern {S3,S4,S5} is be detected.
349
350 Output:
351
352 * TYPE_IN: The type of the input arguments to the pattern.
353
354 * TYPE_OUT: The type of the output of this pattern.
355
356 * Return value: A new stmt that will be used to replace the sequence of
357 stmts that constitute the pattern. In this case it will be:
358 WIDEN_MULT <a_t, b_t>
359*/
360
75a70cf9 361static gimple
48e1416a 362vect_recog_widen_mult_pattern (gimple last_stmt,
363 tree *type_in,
c6c91d61 364 tree *type_out)
4a61a337 365{
75a70cf9 366 gimple def_stmt0, def_stmt1;
c6c91d61 367 tree oprnd0, oprnd1;
368 tree type, half_type0, half_type1;
75a70cf9 369 gimple pattern_stmt;
b334cbba 370 tree vectype, vectype_out;
c6c91d61 371 tree dummy;
75a70cf9 372 tree var;
c6c91d61 373 enum tree_code dummy_code;
862bb3cd 374 int dummy_int;
375 VEC (tree, heap) *dummy_vec;
c6c91d61 376
75a70cf9 377 if (!is_gimple_assign (last_stmt))
c6c91d61 378 return NULL;
379
75a70cf9 380 type = gimple_expr_type (last_stmt);
c6c91d61 381
382 /* Starting from LAST_STMT, follow the defs of its uses in search
383 of the above pattern. */
384
75a70cf9 385 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
c6c91d61 386 return NULL;
387
75a70cf9 388 oprnd0 = gimple_assign_rhs1 (last_stmt);
389 oprnd1 = gimple_assign_rhs2 (last_stmt);
1ea6a73c 390 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
391 || !types_compatible_p (TREE_TYPE (oprnd1), type))
c6c91d61 392 return NULL;
393
394 /* Check argument 0 */
395 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0))
396 return NULL;
75a70cf9 397 oprnd0 = gimple_assign_rhs1 (def_stmt0);
c6c91d61 398
399 /* Check argument 1 */
400 if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1))
401 return NULL;
75a70cf9 402 oprnd1 = gimple_assign_rhs1 (def_stmt1);
c6c91d61 403
1ea6a73c 404 if (!types_compatible_p (half_type0, half_type1))
c6c91d61 405 return NULL;
406
407 /* Pattern detected. */
408 if (vect_print_dump_info (REPORT_DETAILS))
409 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
410
411 /* Check target support */
412 vectype = get_vectype_for_scalar_type (half_type0);
b334cbba 413 vectype_out = get_vectype_for_scalar_type (type);
f031fa03 414 if (!vectype
1548bce8 415 || !vectype_out
b334cbba 416 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
417 vectype_out, vectype,
75a70cf9 418 &dummy, &dummy, &dummy_code,
862bb3cd 419 &dummy_code, &dummy_int, &dummy_vec))
c6c91d61 420 return NULL;
421
422 *type_in = vectype;
b334cbba 423 *type_out = vectype_out;
c6c91d61 424
425 /* Pattern supported. Create a stmt to be used to replace the pattern: */
75a70cf9 426 var = vect_recog_temp_ssa_var (type, NULL);
427 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
428 oprnd1);
429 SSA_NAME_DEF_STMT (var) = pattern_stmt;
430
c6c91d61 431 if (vect_print_dump_info (REPORT_DETAILS))
75a70cf9 432 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
433
434 return pattern_stmt;
4a61a337 435}
436
437
c37bea13 438/* Function vect_recog_pow_pattern
439
440 Try to find the following pattern:
441
442 x = POW (y, N);
443
444 with POW being one of pow, powf, powi, powif and N being
445 either 2 or 0.5.
446
447 Input:
448
449 * LAST_STMT: A stmt from which the pattern search begins.
450
451 Output:
452
453 * TYPE_IN: The type of the input arguments to the pattern.
454
455 * TYPE_OUT: The type of the output of this pattern.
456
457 * Return value: A new stmt that will be used to replace the sequence of
458 stmts that constitute the pattern. In this case it will be:
75a70cf9 459 x = x * x
c37bea13 460 or
75a70cf9 461 x = sqrt (x)
c37bea13 462*/
463
75a70cf9 464static gimple
465vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out)
c37bea13 466{
75a70cf9 467 tree fn, base, exp = NULL;
468 gimple stmt;
469 tree var;
c37bea13 470
75a70cf9 471 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
c37bea13 472 return NULL;
473
75a70cf9 474 fn = gimple_call_fndecl (last_stmt);
c37bea13 475 switch (DECL_FUNCTION_CODE (fn))
476 {
477 case BUILT_IN_POWIF:
478 case BUILT_IN_POWI:
479 case BUILT_IN_POWF:
480 case BUILT_IN_POW:
75a70cf9 481 base = gimple_call_arg (last_stmt, 0);
482 exp = gimple_call_arg (last_stmt, 1);
c37bea13 483 if (TREE_CODE (exp) != REAL_CST
484 && TREE_CODE (exp) != INTEGER_CST)
75a70cf9 485 return NULL;
c37bea13 486 break;
487
75a70cf9 488 default:
489 return NULL;
c37bea13 490 }
491
492 /* We now have a pow or powi builtin function call with a constant
493 exponent. */
494
c37bea13 495 *type_out = NULL_TREE;
496
497 /* Catch squaring. */
498 if ((host_integerp (exp, 0)
499 && tree_low_cst (exp, 0) == 2)
500 || (TREE_CODE (exp) == REAL_CST
501 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
4957645d 502 {
503 *type_in = TREE_TYPE (base);
75a70cf9 504
505 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
506 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
507 SSA_NAME_DEF_STMT (var) = stmt;
508 return stmt;
4957645d 509 }
c37bea13 510
511 /* Catch square root. */
512 if (TREE_CODE (exp) == REAL_CST
513 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
514 {
515 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
4957645d 516 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
517 if (*type_in)
518 {
75a70cf9 519 gimple stmt = gimple_build_call (newfn, 1, base);
520 if (vectorizable_function (stmt, *type_in, *type_in)
521 != NULL_TREE)
522 {
523 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
48e1416a 524 gimple_call_set_lhs (stmt, var);
75a70cf9 525 return stmt;
526 }
4957645d 527 }
c37bea13 528 }
529
75a70cf9 530 return NULL;
c37bea13 531}
532
533
4a61a337 534/* Function vect_recog_widen_sum_pattern
535
536 Try to find the following pattern:
537
48e1416a 538 type x_t;
4a61a337 539 TYPE x_T, sum = init;
540 loop:
541 sum_0 = phi <init, sum_1>
542 S1 x_t = *p;
543 S2 x_T = (TYPE) x_t;
544 S3 sum_1 = x_T + sum_0;
545
48e1416a 546 where type 'TYPE' is at least double the size of type 'type', i.e - we're
4a61a337 547 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
9ca2c29a 548 a special case of a reduction computation.
4a61a337 549
550 Input:
551
552 * LAST_STMT: A stmt from which the pattern search begins. In the example,
553 when this function is called with S3, the pattern {S2,S3} will be detected.
48e1416a 554
4a61a337 555 Output:
48e1416a 556
4a61a337 557 * TYPE_IN: The type of the input arguments to the pattern.
558
559 * TYPE_OUT: The type of the output of this pattern.
560
561 * Return value: A new stmt that will be used to replace the sequence of
562 stmts that constitute the pattern. In this case it will be:
563 WIDEN_SUM <x_t, sum_0>
221e9a92 564
48e1416a 565 Note: The widening-sum idiom is a widening reduction pattern that is
221e9a92 566 vectorized without preserving all the intermediate results. It
48e1416a 567 produces only N/2 (widened) results (by summing up pairs of
568 intermediate results) rather than all N results. Therefore, we
569 cannot allow this pattern when we want to get all the results and in
570 the correct order (as is the case when this computation is in an
221e9a92 571 inner-loop nested in an outer-loop that us being vectorized). */
4a61a337 572
75a70cf9 573static gimple
574vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out)
4a61a337 575{
75a70cf9 576 gimple stmt;
4a61a337 577 tree oprnd0, oprnd1;
578 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
579 tree type, half_type;
75a70cf9 580 gimple pattern_stmt;
221e9a92 581 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
582 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
75a70cf9 583 tree var;
4a61a337 584
75a70cf9 585 if (!is_gimple_assign (last_stmt))
4a61a337 586 return NULL;
587
75a70cf9 588 type = gimple_expr_type (last_stmt);
4a61a337 589
590 /* Look for the following pattern
591 DX = (TYPE) X;
592 sum_1 = DX + sum_0;
593 In which DX is at least double the size of X, and sum_1 has been
594 recognized as a reduction variable.
595 */
596
597 /* Starting from LAST_STMT, follow the defs of its uses in search
598 of the above pattern. */
599
75a70cf9 600 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
4a61a337 601 return NULL;
602
603 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
604 return NULL;
605
75a70cf9 606 oprnd0 = gimple_assign_rhs1 (last_stmt);
607 oprnd1 = gimple_assign_rhs2 (last_stmt);
1ea6a73c 608 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
609 || !types_compatible_p (TREE_TYPE (oprnd1), type))
4a61a337 610 return NULL;
611
612 /* So far so good. Since last_stmt was detected as a (summation) reduction,
613 we know that oprnd1 is the reduction variable (defined by a loop-header
614 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
615 Left to check that oprnd0 is defined by a cast from type 'type' to type
616 'TYPE'. */
617
618 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt))
619 return NULL;
620
75a70cf9 621 oprnd0 = gimple_assign_rhs1 (stmt);
4a61a337 622 *type_in = half_type;
623 *type_out = type;
624
625 /* Pattern detected. Create a stmt to be used to replace the pattern: */
75a70cf9 626 var = vect_recog_temp_ssa_var (type, NULL);
627 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
628 oprnd0, oprnd1);
629 SSA_NAME_DEF_STMT (var) = pattern_stmt;
630
4a61a337 631 if (vect_print_dump_info (REPORT_DETAILS))
632 {
633 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
75a70cf9 634 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
4a61a337 635 }
221e9a92 636
637 /* We don't allow changing the order of the computation in the inner-loop
638 when doing outer-loop vectorization. */
ade2ac53 639 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
221e9a92 640
75a70cf9 641 return pattern_stmt;
4a61a337 642}
643
644
48e1416a 645/* Function vect_pattern_recog_1
4a61a337 646
647 Input:
648 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
649 computation pattern.
650 STMT: A stmt from which the pattern search should start.
651
652 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
48e1416a 653 expression that computes the same functionality and can be used to
654 replace the sequence of stmts that are involved in the pattern.
4a61a337 655
656 Output:
48e1416a 657 This function checks if the expression returned by PATTERN_RECOG_FUNC is
658 supported in vector form by the target. We use 'TYPE_IN' to obtain the
659 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4a61a337 660 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
661 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
662 to the available target pattern.
663
48e1416a 664 This function also does some bookkeeping, as explained in the documentation
4a61a337 665 for vect_recog_pattern. */
666
667static void
668vect_pattern_recog_1 (
75a70cf9 669 gimple (* vect_recog_func) (gimple, tree *, tree *),
670 gimple_stmt_iterator si)
4a61a337 671{
75a70cf9 672 gimple stmt = gsi_stmt (si), pattern_stmt;
4a61a337 673 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
674 stmt_vec_info pattern_stmt_info;
675 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4a61a337 676 tree pattern_vectype;
677 tree type_in, type_out;
4a61a337 678 enum tree_code code;
eefa05c8 679 int i;
680 gimple next;
4a61a337 681
75a70cf9 682 pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out);
683 if (!pattern_stmt)
48e1416a 684 return;
685
686 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
687 {
688 /* No need to check target support (already checked by the pattern
689 recognition function). */
b334cbba 690 if (type_out)
691 gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out)));
692 pattern_vectype = type_out ? type_out : type_in;
4a61a337 693 }
694 else
695 {
8458f4ca 696 enum machine_mode vec_mode;
4a61a337 697 enum insn_code icode;
698 optab optab;
699
700 /* Check target support */
b334cbba 701 type_in = get_vectype_for_scalar_type (type_in);
702 if (!type_in)
703 return;
704 if (type_out)
705 type_out = get_vectype_for_scalar_type (type_out);
706 else
707 type_out = type_in;
ed67497f 708 if (!type_out)
709 return;
b334cbba 710 pattern_vectype = type_out;
f031fa03 711
75a70cf9 712 if (is_gimple_assign (pattern_stmt))
713 code = gimple_assign_rhs_code (pattern_stmt);
714 else
715 {
716 gcc_assert (is_gimple_call (pattern_stmt));
717 code = CALL_EXPR;
718 }
719
b334cbba 720 optab = optab_for_tree_code (code, type_in, optab_default);
721 vec_mode = TYPE_MODE (type_in);
4a61a337 722 if (!optab
d6bf3b14 723 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
b334cbba 724 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4a61a337 725 return;
726 }
727
728 /* Found a vectorizable pattern. */
729 if (vect_print_dump_info (REPORT_DETAILS))
730 {
48e1416a 731 fprintf (vect_dump, "pattern recognized: ");
75a70cf9 732 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
4a61a337 733 }
48e1416a 734
75a70cf9 735 /* Mark the stmts that are involved in the pattern. */
736 gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT);
737 set_vinfo_for_stmt (pattern_stmt,
37545e54 738 new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
75a70cf9 739 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
48e1416a 740
4a61a337 741 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
742 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info);
743 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
744 STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
75a70cf9 745 STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt;
4a61a337 746
eefa05c8 747 /* Patterns cannot be vectorized using SLP, because they change the order of
748 computation. */
48148244 749 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
eefa05c8 750 if (next == stmt)
751 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
4a61a337 752}
753
754
755/* Function vect_pattern_recog
756
757 Input:
758 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
759 computation idioms.
760
761 Output - for each computation idiom that is detected we insert a new stmt
762 that provides the same functionality and that can be vectorized. We
763 also record some information in the struct_stmt_info of the relevant
764 stmts, as explained below:
765
766 At the entry to this function we have the following stmts, with the
767 following initial value in the STMT_VINFO fields:
768
769 stmt in_pattern_p related_stmt vec_stmt
770 S1: a_i = .... - - -
771 S2: a_2 = ..use(a_i).. - - -
772 S3: a_1 = ..use(a_2).. - - -
773 S4: a_0 = ..use(a_1).. - - -
774 S5: ... = ..use(a_0).. - - -
775
776 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
777 represented by a single stmt. We then:
778 - create a new stmt S6 that will replace the pattern.
779 - insert the new stmt S6 before the last stmt in the pattern
780 - fill in the STMT_VINFO fields as follows:
781
782 in_pattern_p related_stmt vec_stmt
48e1416a 783 S1: a_i = .... - - -
4a61a337 784 S2: a_2 = ..use(a_i).. - - -
785 S3: a_1 = ..use(a_2).. - - -
786 > S6: a_new = .... - S4 -
787 S4: a_0 = ..use(a_1).. true S6 -
788 S5: ... = ..use(a_0).. - - -
789
790 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
791 to each other through the RELATED_STMT field).
792
793 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
794 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
795 remain irrelevant unless used by stmts other than S4.
796
797 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
334ec2d8 798 (because they are marked as irrelevant). It will vectorize S6, and record
48e1416a 799 a pointer to the new vector stmt VS6 both from S6 (as usual), and also
4a61a337 800 from S4. We do that so that when we get to vectorizing stmts that use the
801 def of S4 (like S5 that uses a_0), we'll know where to take the relevant
802 vector-def from. S4 will be skipped, and S5 will be vectorized as usual:
803
804 in_pattern_p related_stmt vec_stmt
805 S1: a_i = .... - - -
806 S2: a_2 = ..use(a_i).. - - -
807 S3: a_1 = ..use(a_2).. - - -
808 > VS6: va_new = .... - - -
809 S6: a_new = .... - S4 VS6
810 S4: a_0 = ..use(a_1).. true S6 VS6
811 > VS5: ... = ..vuse(va_new).. - - -
812 S5: ... = ..use(a_0).. - - -
813
814 DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used
815 elsewhere), and we'll end up with:
816
48e1416a 817 VS6: va_new = ....
4a61a337 818 VS5: ... = ..vuse(va_new)..
819
820 If vectorization does not succeed, DCE will clean S6 away (its def is
821 not used), and we'll end up with the original sequence.
822*/
823
824void
825vect_pattern_recog (loop_vec_info loop_vinfo)
826{
827 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
828 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
829 unsigned int nbbs = loop->num_nodes;
75a70cf9 830 gimple_stmt_iterator si;
4a61a337 831 unsigned int i, j;
75a70cf9 832 gimple (* vect_recog_func_ptr) (gimple, tree *, tree *);
4a61a337 833
834 if (vect_print_dump_info (REPORT_DETAILS))
835 fprintf (vect_dump, "=== vect_pattern_recog ===");
836
837 /* Scan through the loop stmts, applying the pattern recognition
838 functions starting at each stmt visited: */
839 for (i = 0; i < nbbs; i++)
840 {
841 basic_block bb = bbs[i];
75a70cf9 842 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4a61a337 843 {
4a61a337 844 /* Scan over all generic vect_recog_xxx_pattern functions. */
845 for (j = 0; j < NUM_PATTERNS; j++)
846 {
847 vect_recog_func_ptr = vect_vect_recog_func_ptrs[j];
848 vect_pattern_recog_1 (vect_recog_func_ptr, si);
849 }
850 }
851 }
852}