]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
re PR fortran/49417 ([OOP] ICE on invalid CLASS component declaration)
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
cf835838 2 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
20f06221
DN
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
20f06221
DN
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20f06221
DN
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "ggc.h"
26#include "tree.h"
20f06221
DN
27#include "target.h"
28#include "basic-block.h"
cf835838 29#include "gimple-pretty-print.h"
20f06221
DN
30#include "tree-flow.h"
31#include "tree-dump.h"
20f06221
DN
32#include "cfgloop.h"
33#include "expr.h"
34#include "optabs.h"
35#include "params.h"
36#include "tree-data-ref.h"
37#include "tree-vectorizer.h"
38#include "recog.h"
718f9c0f 39#include "diagnostic-core.h"
20f06221 40
20f06221 41/* Pattern recognition functions */
383d9c83
IR
42static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *);
43static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *);
44static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *);
45static gimple vect_recog_pow_pattern (gimple *, tree *, tree *);
20f06221
DN
46static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
47 vect_recog_widen_mult_pattern,
48 vect_recog_widen_sum_pattern,
0b2229b0
RG
49 vect_recog_dot_prod_pattern,
50 vect_recog_pow_pattern};
20f06221
DN
51
52
53/* Function widened_name_p
54
55 Check whether NAME, an ssa-name used in USE_STMT,
56 is a result of a type-promotion, such that:
57 DEF_STMT: NAME = NOP (name0)
b8698a0f 58 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
383d9c83
IR
59 If CHECK_SIGN is TRUE, check that either both types are signed or both are
60 unsigned. */
20f06221
DN
61
62static bool
383d9c83
IR
63widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
64 bool check_sign)
20f06221
DN
65{
66 tree dummy;
726a989a 67 gimple dummy_gimple;
20f06221
DN
68 loop_vec_info loop_vinfo;
69 stmt_vec_info stmt_vinfo;
20f06221
DN
70 tree type = TREE_TYPE (name);
71 tree oprnd0;
72 enum vect_def_type dt;
73 tree def;
74
75 stmt_vinfo = vinfo_for_stmt (use_stmt);
76 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
77
a70d6342 78 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
20f06221
DN
79 return false;
80
8644a673
IR
81 if (dt != vect_internal_def
82 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
83 return false;
84
85 if (! *def_stmt)
86 return false;
87
726a989a 88 if (!is_gimple_assign (*def_stmt))
20f06221
DN
89 return false;
90
726a989a 91 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
20f06221
DN
92 return false;
93
726a989a 94 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221
DN
95
96 *half_type = TREE_TYPE (oprnd0);
97 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
383d9c83 98 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
20f06221
DN
99 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
100 return false;
101
b8698a0f 102 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
a70d6342 103 &dt))
20f06221
DN
104 return false;
105
20f06221
DN
106 return true;
107}
108
726a989a
RB
109/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
110 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
111
112static tree
113vect_recog_temp_ssa_var (tree type, gimple stmt)
114{
115 tree var = create_tmp_var (type, "patt");
116
117 add_referenced_var (var);
118 var = make_ssa_name (var, stmt);
119 return var;
120}
20f06221
DN
121
122/* Function vect_recog_dot_prod_pattern
123
124 Try to find the following pattern:
125
126 type x_t, y_t;
127 TYPE1 prod;
128 TYPE2 sum = init;
129 loop:
130 sum_0 = phi <init, sum_1>
131 S1 x_t = ...
132 S2 y_t = ...
133 S3 x_T = (TYPE1) x_t;
134 S4 y_T = (TYPE1) y_t;
135 S5 prod = x_T * y_T;
136 [S6 prod = (TYPE2) prod; #optional]
137 S7 sum_1 = prod + sum_0;
138
b8698a0f
L
139 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
140 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 141 computation.
b8698a0f 142
20f06221
DN
143 Input:
144
145 * LAST_STMT: A stmt from which the pattern search begins. In the example,
146 when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be
147 detected.
148
149 Output:
150
151 * TYPE_IN: The type of the input arguments to the pattern.
152
153 * TYPE_OUT: The type of the output of this pattern.
154
155 * Return value: A new stmt that will be used to replace the sequence of
156 stmts that constitute the pattern. In this case it will be:
157 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
158
159 Note: The dot-prod idiom is a widening reduction pattern that is
160 vectorized without preserving all the intermediate results. It
161 produces only N/2 (widened) results (by summing up pairs of
162 intermediate results) rather than all N results. Therefore, we
163 cannot allow this pattern when we want to get all the results and in
164 the correct order (as is the case when this computation is in an
165 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 166
726a989a 167static gimple
383d9c83 168vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
20f06221 169{
726a989a 170 gimple stmt;
20f06221
DN
171 tree oprnd0, oprnd1;
172 tree oprnd00, oprnd01;
383d9c83 173 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
20f06221 174 tree type, half_type;
726a989a 175 gimple pattern_stmt;
20f06221 176 tree prod_type;
d29de1bf
DN
177 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
178 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
f471fe72 179 tree var;
20f06221 180
383d9c83 181 if (!is_gimple_assign (*last_stmt))
20f06221
DN
182 return NULL;
183
383d9c83 184 type = gimple_expr_type (*last_stmt);
20f06221 185
b8698a0f 186 /* Look for the following pattern
20f06221
DN
187 DX = (TYPE1) X;
188 DY = (TYPE1) Y;
b8698a0f 189 DPROD = DX * DY;
20f06221
DN
190 DDPROD = (TYPE2) DPROD;
191 sum_1 = DDPROD + sum_0;
b8698a0f 192 In which
20f06221
DN
193 - DX is double the size of X
194 - DY is double the size of Y
195 - DX, DY, DPROD all have the same type
196 - sum is the same size of DPROD or bigger
197 - sum has been recognized as a reduction variable.
198
199 This is equivalent to:
200 DPROD = X w* Y; #widen mult
201 sum_1 = DPROD w+ sum_0; #widen summation
202 or
203 DPROD = X w* Y; #widen mult
204 sum_1 = DPROD + sum_0; #summation
205 */
206
207 /* Starting from LAST_STMT, follow the defs of its uses in search
208 of the above pattern. */
209
383d9c83 210 if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
20f06221
DN
211 return NULL;
212
213 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
214 {
215 /* Has been detected as widening-summation? */
216
217 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a
RB
218 type = gimple_expr_type (stmt);
219 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
20f06221 220 return NULL;
726a989a
RB
221 oprnd0 = gimple_assign_rhs1 (stmt);
222 oprnd1 = gimple_assign_rhs2 (stmt);
20f06221
DN
223 half_type = TREE_TYPE (oprnd0);
224 }
225 else
226 {
726a989a 227 gimple def_stmt;
20f06221
DN
228
229 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
230 return NULL;
383d9c83
IR
231 oprnd0 = gimple_assign_rhs1 (*last_stmt);
232 oprnd1 = gimple_assign_rhs2 (*last_stmt);
9600efe1
MM
233 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
234 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221 235 return NULL;
383d9c83 236 stmt = *last_stmt;
20f06221 237
383d9c83 238 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
20f06221
DN
239 {
240 stmt = def_stmt;
726a989a 241 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
242 }
243 else
244 half_type = type;
245 }
246
383d9c83 247 /* So far so good. Since *last_stmt was detected as a (summation) reduction,
20f06221
DN
248 we know that oprnd1 is the reduction variable (defined by a loop-header
249 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
250 Left to check that oprnd0 is defined by a (widen_)mult_expr */
251
252 prod_type = half_type;
253 stmt = SSA_NAME_DEF_STMT (oprnd0);
3cb35c12
CF
254
255 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
75264e61 256 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
3cb35c12
CF
257 return NULL;
258
b8698a0f 259 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 260 inside the loop (in case we are analyzing an outer-loop). */
726a989a 261 if (!is_gimple_assign (stmt))
b8698a0f 262 return NULL;
20f06221
DN
263 stmt_vinfo = vinfo_for_stmt (stmt);
264 gcc_assert (stmt_vinfo);
8644a673 265 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
b3130586 266 return NULL;
726a989a 267 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
20f06221
DN
268 return NULL;
269 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
270 {
271 /* Has been detected as a widening multiplication? */
272
273 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a 274 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
20f06221
DN
275 return NULL;
276 stmt_vinfo = vinfo_for_stmt (stmt);
277 gcc_assert (stmt_vinfo);
8644a673 278 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
726a989a
RB
279 oprnd00 = gimple_assign_rhs1 (stmt);
280 oprnd01 = gimple_assign_rhs2 (stmt);
20f06221
DN
281 }
282 else
283 {
284 tree half_type0, half_type1;
726a989a 285 gimple def_stmt;
20f06221
DN
286 tree oprnd0, oprnd1;
287
726a989a
RB
288 oprnd0 = gimple_assign_rhs1 (stmt);
289 oprnd1 = gimple_assign_rhs2 (stmt);
9600efe1
MM
290 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
291 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
20f06221 292 return NULL;
383d9c83 293 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
20f06221 294 return NULL;
726a989a 295 oprnd00 = gimple_assign_rhs1 (def_stmt);
383d9c83 296 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
20f06221 297 return NULL;
726a989a 298 oprnd01 = gimple_assign_rhs1 (def_stmt);
9600efe1 299 if (!types_compatible_p (half_type0, half_type1))
20f06221
DN
300 return NULL;
301 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
302 return NULL;
303 }
304
305 half_type = TREE_TYPE (oprnd00);
306 *type_in = half_type;
307 *type_out = type;
b8698a0f 308
20f06221 309 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 310 var = vect_recog_temp_ssa_var (type, NULL);
f471fe72
RG
311 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
312 oprnd00, oprnd01, oprnd1);
b8698a0f 313
20f06221
DN
314 if (vect_print_dump_info (REPORT_DETAILS))
315 {
316 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
726a989a 317 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
20f06221 318 }
d29de1bf
DN
319
320 /* We don't allow changing the order of the computation in the inner-loop
321 when doing outer-loop vectorization. */
383d9c83 322 gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
d29de1bf 323
726a989a 324 return pattern_stmt;
20f06221 325}
b8698a0f 326
20f06221
DN
327/* Function vect_recog_widen_mult_pattern
328
329 Try to find the following pattern:
330
331 type a_t, b_t;
332 TYPE a_T, b_T, prod_T;
333
334 S1 a_t = ;
335 S2 b_t = ;
336 S3 a_T = (TYPE) a_t;
337 S4 b_T = (TYPE) b_t;
338 S5 prod_T = a_T * b_T;
339
340 where type 'TYPE' is at least double the size of type 'type'.
341
383d9c83
IR
342 Also detect unsgigned cases:
343
344 unsigned type a_t, b_t;
345 unsigned TYPE u_prod_T;
346 TYPE a_T, b_T, prod_T;
347
348 S1 a_t = ;
349 S2 b_t = ;
350 S3 a_T = (TYPE) a_t;
351 S4 b_T = (TYPE) b_t;
352 S5 prod_T = a_T * b_T;
353 S6 u_prod_T = (unsigned TYPE) prod_T;
354
355 and multiplication by constants:
356
357 type a_t;
358 TYPE a_T, prod_T;
359
360 S1 a_t = ;
361 S3 a_T = (TYPE) a_t;
362 S5 prod_T = a_T * CONST;
363
20f06221
DN
364 Input:
365
383d9c83
IR
366 * LAST_STMT: A stmt from which the pattern search begins. In the example,
367 when this function is called with S5, the pattern {S3,S4,S5,(S6)} is
368 detected.
20f06221
DN
369
370 Output:
371
372 * TYPE_IN: The type of the input arguments to the pattern.
373
383d9c83 374 * TYPE_OUT: The type of the output of this pattern.
20f06221
DN
375
376 * Return value: A new stmt that will be used to replace the sequence of
383d9c83 377 stmts that constitute the pattern. In this case it will be:
20f06221
DN
378 WIDEN_MULT <a_t, b_t>
379*/
380
726a989a 381static gimple
383d9c83 382vect_recog_widen_mult_pattern (gimple *last_stmt,
b8698a0f 383 tree *type_in,
89d67cca 384 tree *type_out)
20f06221 385{
726a989a 386 gimple def_stmt0, def_stmt1;
89d67cca
DN
387 tree oprnd0, oprnd1;
388 tree type, half_type0, half_type1;
726a989a 389 gimple pattern_stmt;
383d9c83 390 tree vectype, vectype_out = NULL_TREE;
89d67cca 391 tree dummy;
726a989a 392 tree var;
89d67cca 393 enum tree_code dummy_code;
5d593372
IR
394 int dummy_int;
395 VEC (tree, heap) *dummy_vec;
383d9c83 396 bool op0_ok, op1_ok;
89d67cca 397
383d9c83 398 if (!is_gimple_assign (*last_stmt))
89d67cca
DN
399 return NULL;
400
383d9c83 401 type = gimple_expr_type (*last_stmt);
89d67cca
DN
402
403 /* Starting from LAST_STMT, follow the defs of its uses in search
404 of the above pattern. */
405
383d9c83 406 if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR)
89d67cca
DN
407 return NULL;
408
383d9c83
IR
409 oprnd0 = gimple_assign_rhs1 (*last_stmt);
410 oprnd1 = gimple_assign_rhs2 (*last_stmt);
9600efe1
MM
411 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
412 || !types_compatible_p (TREE_TYPE (oprnd1), type))
89d67cca
DN
413 return NULL;
414
383d9c83
IR
415 /* Check argument 0. */
416 op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false);
417 /* Check argument 1. */
418 op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false);
89d67cca 419
383d9c83
IR
420 /* In case of multiplication by a constant one of the operands may not match
421 the pattern, but not both. */
422 if (!op0_ok && !op1_ok)
89d67cca 423 return NULL;
383d9c83
IR
424
425 if (op0_ok && op1_ok)
426 {
427 oprnd0 = gimple_assign_rhs1 (def_stmt0);
428 oprnd1 = gimple_assign_rhs1 (def_stmt1);
429 }
430 else if (!op0_ok)
431 {
432 if (CONSTANT_CLASS_P (oprnd0)
433 && TREE_CODE (half_type1) == INTEGER_TYPE
434 && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1))
435 && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0))
436 {
437 /* OPRND0 is a constant of HALF_TYPE1. */
438 half_type0 = half_type1;
439 oprnd1 = gimple_assign_rhs1 (def_stmt1);
440 }
441 else
442 return NULL;
443 }
444 else if (!op1_ok)
445 {
446 if (CONSTANT_CLASS_P (oprnd1)
447 && TREE_CODE (half_type0) == INTEGER_TYPE
448 && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0))
449 && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1))
450 {
451 /* OPRND1 is a constant of HALF_TYPE0. */
452 half_type1 = half_type0;
453 oprnd0 = gimple_assign_rhs1 (def_stmt0);
454 }
455 else
456 return NULL;
457 }
458
459 /* Handle unsigned case. Look for
460 S6 u_prod_T = (unsigned TYPE) prod_T;
461 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
462 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
463 {
464 tree lhs = gimple_assign_lhs (*last_stmt), use_lhs;
465 imm_use_iterator imm_iter;
466 use_operand_p use_p;
467 int nuses = 0;
468 gimple use_stmt = NULL;
469 tree use_type;
470
471 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
472 return NULL;
473
474 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
475 {
476 use_stmt = USE_STMT (use_p);
477 nuses++;
478 }
479
480 if (nuses != 1 || !is_gimple_assign (use_stmt)
481 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
482 return NULL;
483
484 use_lhs = gimple_assign_lhs (use_stmt);
485 use_type = TREE_TYPE (use_lhs);
486 if (!INTEGRAL_TYPE_P (use_type)
487 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
488 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
489 return NULL;
490
491 type = use_type;
492 *last_stmt = use_stmt;
493 }
89d67cca 494
9600efe1 495 if (!types_compatible_p (half_type0, half_type1))
89d67cca
DN
496 return NULL;
497
498 /* Pattern detected. */
499 if (vect_print_dump_info (REPORT_DETAILS))
500 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
501
502 /* Check target support */
503 vectype = get_vectype_for_scalar_type (half_type0);
b690cc0f 504 vectype_out = get_vectype_for_scalar_type (type);
03d3e953 505 if (!vectype
d163c4f7 506 || !vectype_out
383d9c83 507 || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt,
b690cc0f 508 vectype_out, vectype,
726a989a 509 &dummy, &dummy, &dummy_code,
5d593372 510 &dummy_code, &dummy_int, &dummy_vec))
89d67cca
DN
511 return NULL;
512
513 *type_in = vectype;
b690cc0f 514 *type_out = vectype_out;
89d67cca
DN
515
516 /* Pattern supported. Create a stmt to be used to replace the pattern: */
726a989a
RB
517 var = vect_recog_temp_ssa_var (type, NULL);
518 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
519 oprnd1);
520 SSA_NAME_DEF_STMT (var) = pattern_stmt;
521
89d67cca 522 if (vect_print_dump_info (REPORT_DETAILS))
726a989a
RB
523 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
524
525 return pattern_stmt;
20f06221
DN
526}
527
528
0b2229b0
RG
529/* Function vect_recog_pow_pattern
530
531 Try to find the following pattern:
532
533 x = POW (y, N);
534
535 with POW being one of pow, powf, powi, powif and N being
536 either 2 or 0.5.
537
538 Input:
539
540 * LAST_STMT: A stmt from which the pattern search begins.
541
542 Output:
543
544 * TYPE_IN: The type of the input arguments to the pattern.
545
546 * TYPE_OUT: The type of the output of this pattern.
547
548 * Return value: A new stmt that will be used to replace the sequence of
549 stmts that constitute the pattern. In this case it will be:
726a989a 550 x = x * x
0b2229b0 551 or
726a989a 552 x = sqrt (x)
0b2229b0
RG
553*/
554
726a989a 555static gimple
383d9c83 556vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
0b2229b0 557{
726a989a
RB
558 tree fn, base, exp = NULL;
559 gimple stmt;
560 tree var;
0b2229b0 561
383d9c83 562 if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL)
0b2229b0
RG
563 return NULL;
564
383d9c83 565 fn = gimple_call_fndecl (*last_stmt);
52bd463c
RG
566 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
567 return NULL;
568
0b2229b0
RG
569 switch (DECL_FUNCTION_CODE (fn))
570 {
571 case BUILT_IN_POWIF:
572 case BUILT_IN_POWI:
573 case BUILT_IN_POWF:
574 case BUILT_IN_POW:
383d9c83
IR
575 base = gimple_call_arg (*last_stmt, 0);
576 exp = gimple_call_arg (*last_stmt, 1);
0b2229b0
RG
577 if (TREE_CODE (exp) != REAL_CST
578 && TREE_CODE (exp) != INTEGER_CST)
726a989a 579 return NULL;
0b2229b0
RG
580 break;
581
726a989a
RB
582 default:
583 return NULL;
0b2229b0
RG
584 }
585
586 /* We now have a pow or powi builtin function call with a constant
587 exponent. */
588
0b2229b0
RG
589 *type_out = NULL_TREE;
590
591 /* Catch squaring. */
592 if ((host_integerp (exp, 0)
593 && tree_low_cst (exp, 0) == 2)
594 || (TREE_CODE (exp) == REAL_CST
595 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
c6b1b49b
RG
596 {
597 *type_in = TREE_TYPE (base);
726a989a
RB
598
599 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
600 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
601 SSA_NAME_DEF_STMT (var) = stmt;
602 return stmt;
c6b1b49b 603 }
0b2229b0
RG
604
605 /* Catch square root. */
606 if (TREE_CODE (exp) == REAL_CST
607 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
608 {
609 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
c6b1b49b
RG
610 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
611 if (*type_in)
612 {
726a989a
RB
613 gimple stmt = gimple_build_call (newfn, 1, base);
614 if (vectorizable_function (stmt, *type_in, *type_in)
615 != NULL_TREE)
616 {
617 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
b8698a0f 618 gimple_call_set_lhs (stmt, var);
726a989a
RB
619 return stmt;
620 }
c6b1b49b 621 }
0b2229b0
RG
622 }
623
726a989a 624 return NULL;
0b2229b0
RG
625}
626
627
20f06221
DN
628/* Function vect_recog_widen_sum_pattern
629
630 Try to find the following pattern:
631
b8698a0f 632 type x_t;
20f06221
DN
633 TYPE x_T, sum = init;
634 loop:
635 sum_0 = phi <init, sum_1>
636 S1 x_t = *p;
637 S2 x_T = (TYPE) x_t;
638 S3 sum_1 = x_T + sum_0;
639
b8698a0f 640 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 641 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 642 a special case of a reduction computation.
20f06221
DN
643
644 Input:
645
646 * LAST_STMT: A stmt from which the pattern search begins. In the example,
647 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 648
20f06221 649 Output:
b8698a0f 650
20f06221
DN
651 * TYPE_IN: The type of the input arguments to the pattern.
652
653 * TYPE_OUT: The type of the output of this pattern.
654
655 * Return value: A new stmt that will be used to replace the sequence of
656 stmts that constitute the pattern. In this case it will be:
657 WIDEN_SUM <x_t, sum_0>
d29de1bf 658
b8698a0f 659 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 660 vectorized without preserving all the intermediate results. It
b8698a0f
L
661 produces only N/2 (widened) results (by summing up pairs of
662 intermediate results) rather than all N results. Therefore, we
663 cannot allow this pattern when we want to get all the results and in
664 the correct order (as is the case when this computation is in an
d29de1bf 665 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 666
726a989a 667static gimple
383d9c83 668vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
20f06221 669{
726a989a 670 gimple stmt;
20f06221 671 tree oprnd0, oprnd1;
383d9c83 672 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
20f06221 673 tree type, half_type;
726a989a 674 gimple pattern_stmt;
d29de1bf
DN
675 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
676 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
726a989a 677 tree var;
20f06221 678
383d9c83 679 if (!is_gimple_assign (*last_stmt))
20f06221
DN
680 return NULL;
681
383d9c83 682 type = gimple_expr_type (*last_stmt);
20f06221
DN
683
684 /* Look for the following pattern
685 DX = (TYPE) X;
686 sum_1 = DX + sum_0;
687 In which DX is at least double the size of X, and sum_1 has been
688 recognized as a reduction variable.
689 */
690
691 /* Starting from LAST_STMT, follow the defs of its uses in search
692 of the above pattern. */
693
383d9c83 694 if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
20f06221
DN
695 return NULL;
696
697 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
698 return NULL;
699
383d9c83
IR
700 oprnd0 = gimple_assign_rhs1 (*last_stmt);
701 oprnd1 = gimple_assign_rhs2 (*last_stmt);
9600efe1
MM
702 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
703 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221
DN
704 return NULL;
705
383d9c83 706 /* So far so good. Since *last_stmt was detected as a (summation) reduction,
20f06221
DN
707 we know that oprnd1 is the reduction variable (defined by a loop-header
708 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
709 Left to check that oprnd0 is defined by a cast from type 'type' to type
710 'TYPE'. */
711
383d9c83 712 if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true))
20f06221
DN
713 return NULL;
714
726a989a 715 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
716 *type_in = half_type;
717 *type_out = type;
718
719 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a
RB
720 var = vect_recog_temp_ssa_var (type, NULL);
721 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
722 oprnd0, oprnd1);
723 SSA_NAME_DEF_STMT (var) = pattern_stmt;
724
20f06221
DN
725 if (vect_print_dump_info (REPORT_DETAILS))
726 {
727 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
726a989a 728 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
20f06221 729 }
d29de1bf
DN
730
731 /* We don't allow changing the order of the computation in the inner-loop
732 when doing outer-loop vectorization. */
383d9c83 733 gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
d29de1bf 734
726a989a 735 return pattern_stmt;
20f06221
DN
736}
737
738
b8698a0f 739/* Function vect_pattern_recog_1
20f06221
DN
740
741 Input:
742 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
743 computation pattern.
744 STMT: A stmt from which the pattern search should start.
745
746 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
b8698a0f
L
747 expression that computes the same functionality and can be used to
748 replace the sequence of stmts that are involved in the pattern.
20f06221
DN
749
750 Output:
b8698a0f
L
751 This function checks if the expression returned by PATTERN_RECOG_FUNC is
752 supported in vector form by the target. We use 'TYPE_IN' to obtain the
753 relevant vector type. If 'TYPE_IN' is already a vector type, then this
20f06221
DN
754 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
755 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
756 to the available target pattern.
757
b8698a0f 758 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
759 for vect_recog_pattern. */
760
761static void
762vect_pattern_recog_1 (
383d9c83 763 gimple (* vect_recog_func) (gimple *, tree *, tree *),
726a989a 764 gimple_stmt_iterator si)
20f06221 765{
726a989a 766 gimple stmt = gsi_stmt (si), pattern_stmt;
383d9c83 767 stmt_vec_info stmt_info;
20f06221 768 stmt_vec_info pattern_stmt_info;
383d9c83 769 loop_vec_info loop_vinfo;
20f06221
DN
770 tree pattern_vectype;
771 tree type_in, type_out;
20f06221 772 enum tree_code code;
b5aeb3bb
IR
773 int i;
774 gimple next;
20f06221 775
383d9c83 776 pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out);
726a989a 777 if (!pattern_stmt)
b8698a0f
L
778 return;
779
383d9c83
IR
780 si = gsi_for_stmt (stmt);
781 stmt_info = vinfo_for_stmt (stmt);
782 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
783
b8698a0f
L
784 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
785 {
786 /* No need to check target support (already checked by the pattern
787 recognition function). */
b690cc0f
RG
788 if (type_out)
789 gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out)));
790 pattern_vectype = type_out ? type_out : type_in;
20f06221
DN
791 }
792 else
793 {
32e8bb8e 794 enum machine_mode vec_mode;
20f06221
DN
795 enum insn_code icode;
796 optab optab;
797
798 /* Check target support */
b690cc0f
RG
799 type_in = get_vectype_for_scalar_type (type_in);
800 if (!type_in)
801 return;
802 if (type_out)
803 type_out = get_vectype_for_scalar_type (type_out);
804 else
805 type_out = type_in;
15bbc165
AO
806 if (!type_out)
807 return;
b690cc0f 808 pattern_vectype = type_out;
03d3e953 809
726a989a
RB
810 if (is_gimple_assign (pattern_stmt))
811 code = gimple_assign_rhs_code (pattern_stmt);
812 else
813 {
814 gcc_assert (is_gimple_call (pattern_stmt));
815 code = CALL_EXPR;
816 }
817
b690cc0f
RG
818 optab = optab_for_tree_code (code, type_in, optab_default);
819 vec_mode = TYPE_MODE (type_in);
20f06221 820 if (!optab
947131ba 821 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
b690cc0f 822 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
20f06221
DN
823 return;
824 }
825
826 /* Found a vectorizable pattern. */
827 if (vect_print_dump_info (REPORT_DETAILS))
828 {
b8698a0f 829 fprintf (vect_dump, "pattern recognized: ");
726a989a 830 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
20f06221 831 }
b8698a0f 832
726a989a 833 /* Mark the stmts that are involved in the pattern. */
726a989a 834 set_vinfo_for_stmt (pattern_stmt,
a70d6342 835 new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
9d5e7640 836 gimple_set_bb (pattern_stmt, gimple_bb (stmt));
726a989a 837 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
b8698a0f 838
20f06221
DN
839 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
840 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info);
841 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
842 STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
726a989a 843 STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt;
20f06221 844
b5aeb3bb
IR
845 /* Patterns cannot be vectorized using SLP, because they change the order of
846 computation. */
ac47786e 847 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
b5aeb3bb
IR
848 if (next == stmt)
849 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
20f06221
DN
850}
851
852
853/* Function vect_pattern_recog
854
855 Input:
856 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
857 computation idioms.
858
9d5e7640
IR
859 Output - for each computation idiom that is detected we create a new stmt
860 that provides the same functionality and that can be vectorized. We
20f06221
DN
861 also record some information in the struct_stmt_info of the relevant
862 stmts, as explained below:
863
864 At the entry to this function we have the following stmts, with the
865 following initial value in the STMT_VINFO fields:
866
867 stmt in_pattern_p related_stmt vec_stmt
868 S1: a_i = .... - - -
869 S2: a_2 = ..use(a_i).. - - -
870 S3: a_1 = ..use(a_2).. - - -
871 S4: a_0 = ..use(a_1).. - - -
872 S5: ... = ..use(a_0).. - - -
873
874 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
875 represented by a single stmt. We then:
876 - create a new stmt S6 equivalent to the pattern (the stmt is not
877 inserted into the code)
20f06221
DN
878 - fill in the STMT_VINFO fields as follows:
879
880 in_pattern_p related_stmt vec_stmt
b8698a0f 881 S1: a_i = .... - - -
20f06221
DN
882 S2: a_2 = ..use(a_i).. - - -
883 S3: a_1 = ..use(a_2).. - - -
20f06221 884 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 885 '---> S6: a_new = .... - S4 -
20f06221
DN
886 S5: ... = ..use(a_0).. - - -
887
888 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 889 to each other through the RELATED_STMT field).
20f06221
DN
890
891 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
892 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
893 remain irrelevant unless used by stmts other than S4.
894
895 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 896 (because they are marked as irrelevant). It will vectorize S6, and record
b8698a0f 897 a pointer to the new vector stmt VS6 both from S6 (as usual), and also
9d5e7640 898 from S4. We do that so that when we get to vectorizing stmts that use the
20f06221 899 def of S4 (like S5 that uses a_0), we'll know where to take the relevant
9d5e7640 900 vector-def from. S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
901
902 in_pattern_p related_stmt vec_stmt
903 S1: a_i = .... - - -
904 S2: a_2 = ..use(a_i).. - - -
905 S3: a_1 = ..use(a_2).. - - -
906 > VS6: va_new = .... - - -
20f06221 907 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 908 '---> S6: a_new = .... - S4 VS6
20f06221
DN
909 > VS5: ... = ..vuse(va_new).. - - -
910 S5: ... = ..use(a_0).. - - -
911
9d5e7640 912 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
913 elsewhere), and we'll end up with:
914
b8698a0f 915 VS6: va_new = ....
9d5e7640 916 VS5: ... = ..vuse(va_new).. */
20f06221
DN
917
918void
919vect_pattern_recog (loop_vec_info loop_vinfo)
920{
921 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
922 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
923 unsigned int nbbs = loop->num_nodes;
726a989a 924 gimple_stmt_iterator si;
20f06221 925 unsigned int i, j;
383d9c83 926 gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
20f06221
DN
927
928 if (vect_print_dump_info (REPORT_DETAILS))
929 fprintf (vect_dump, "=== vect_pattern_recog ===");
930
931 /* Scan through the loop stmts, applying the pattern recognition
932 functions starting at each stmt visited: */
933 for (i = 0; i < nbbs; i++)
934 {
935 basic_block bb = bbs[i];
726a989a 936 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
20f06221 937 {
20f06221
DN
938 /* Scan over all generic vect_recog_xxx_pattern functions. */
939 for (j = 0; j < NUM_PATTERNS; j++)
940 {
941 vect_recog_func_ptr = vect_vect_recog_func_ptrs[j];
942 vect_pattern_recog_1 (vect_recog_func_ptr, si);
943 }
944 }
945 }
946}