]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp-patterns.c
Correct a function pre/postcondition [PR102403].
[thirdparty/gcc.git] / gcc / tree-vect-slp-patterns.c
CommitLineData
3ed472af 1/* SLP - Pattern matcher on SLP trees
99dee823 2 Copyright (C) 2020-2021 Free Software Foundation, Inc.
3ed472af
TC
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "target.h"
25#include "rtl.h"
26#include "tree.h"
27#include "gimple.h"
28#include "tree-pass.h"
29#include "ssa.h"
30#include "optabs-tree.h"
31#include "insn-config.h"
32#include "recog.h" /* FIXME: for insn_data */
33#include "fold-const.h"
34#include "stor-layout.h"
35#include "gimple-iterator.h"
36#include "cfgloop.h"
37#include "tree-vectorizer.h"
38#include "langhooks.h"
39#include "gimple-walk.h"
40#include "dbgcnt.h"
41#include "tree-vector-builder.h"
42#include "vec-perm-indices.h"
43#include "gimple-fold.h"
44#include "internal-fn.h"
45
46/* SLP Pattern matching mechanism.
47
48 This extension to the SLP vectorizer allows one to transform the generated SLP
49 tree based on any pattern. The difference between this and the normal vect
50 pattern matcher is that unlike the former, this matcher allows you to match
51 with instructions that do not belong to the same SSA dominator graph.
52
53 The only requirement that this pattern matcher has is that you are only
54 only allowed to either match an entire group or none.
55
56 The pattern matcher currently only allows you to perform replacements to
57 internal functions.
58
59 Once the patterns are matched it is one way, these cannot be undone. It is
60 currently not supported to match patterns recursively.
61
62 To add a new pattern, implement the vect_pattern class and add the type to
63 slp_patterns.
64
65*/
66
67/*******************************************************************************
68 * vect_pattern class
69 ******************************************************************************/
70
71/* Default implementation of recognize that performs matching, validation and
72 replacement of nodes but that can be overriden if required. */
73
74static bool
75vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
76{
77 tree vectype = SLP_TREE_VECTYPE (node);
78 if (ifn == IFN_LAST || !vectype)
79 return false;
80
81 if (dump_enabled_p ())
82 dump_printf_loc (MSG_NOTE, vect_location,
83 "Found %s pattern in SLP tree\n",
84 internal_fn_name (ifn));
85
86 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
87 {
88 if (dump_enabled_p ())
89 dump_printf_loc (MSG_NOTE, vect_location,
90 "Target supports %s vectorization with mode %T\n",
91 internal_fn_name (ifn), vectype);
92 }
93 else
94 {
95 if (dump_enabled_p ())
96 {
97 if (!vectype)
98 dump_printf_loc (MSG_NOTE, vect_location,
99 "Target does not support vector type for %T\n",
100 SLP_TREE_DEF_TYPE (node));
101 else
102 dump_printf_loc (MSG_NOTE, vect_location,
103 "Target does not support %s for vector type "
104 "%T\n", internal_fn_name (ifn), vectype);
105 }
106 return false;
107 }
108 return true;
109}
110
111/*******************************************************************************
112 * General helper types
113 ******************************************************************************/
114
115/* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
116 be matched when looking for expressions that we are interested matching for
117 complex numbers addition and mla. */
118
119typedef enum _complex_operation : unsigned {
120 PLUS_PLUS,
121 MINUS_PLUS,
122 PLUS_MINUS,
123 MULT_MULT,
124 CMPLX_NONE
125} complex_operation_t;
126
127/*******************************************************************************
128 * General helper functions
129 ******************************************************************************/
130
131/* Helper function of linear_loads_p that checks to see if the load permutation
132 is sequential and in monotonically increasing order of loads with no gaps.
133*/
134
135static inline complex_perm_kinds_t
136is_linear_load_p (load_permutation_t loads)
137{
138 if (loads.length() == 0)
139 return PERM_UNKNOWN;
140
141 unsigned load, i;
142 complex_perm_kinds_t candidates[4]
39666d2b 143 = { PERM_ODDODD
3ed472af 144 , PERM_EVENEVEN
39666d2b
TC
145 , PERM_EVENODD
146 , PERM_ODDEVEN
3ed472af
TC
147 };
148
149 int valid_patterns = 4;
39666d2b 150 FOR_EACH_VEC_ELT (loads, i, load)
3ed472af 151 {
39666d2b 152 if (candidates[0] != PERM_UNKNOWN && load != 1)
3ed472af
TC
153 {
154 candidates[0] = PERM_UNKNOWN;
155 valid_patterns--;
156 }
39666d2b 157 if (candidates[1] != PERM_UNKNOWN && load != 0)
3ed472af
TC
158 {
159 candidates[1] = PERM_UNKNOWN;
160 valid_patterns--;
161 }
39666d2b 162 if (candidates[2] != PERM_UNKNOWN && load != i)
3ed472af
TC
163 {
164 candidates[2] = PERM_UNKNOWN;
165 valid_patterns--;
166 }
39666d2b
TC
167 if (candidates[3] != PERM_UNKNOWN
168 && load != (i % 2 == 0 ? i + 1 : i - 1))
3ed472af
TC
169 {
170 candidates[3] = PERM_UNKNOWN;
171 valid_patterns--;
172 }
173
174 if (valid_patterns == 0)
175 return PERM_UNKNOWN;
176 }
177
178 for (i = 0; i < sizeof(candidates); i++)
179 if (candidates[i] != PERM_UNKNOWN)
180 return candidates[i];
181
182 return PERM_UNKNOWN;
183}
184
185/* Combine complex_perm_kinds A and B into a new permute kind that describes the
186 resulting operation. */
187
188static inline complex_perm_kinds_t
189vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
190{
191 if (a == b)
192 return a;
193
194 if (a == PERM_TOP)
195 return b;
196
197 if (b == PERM_TOP)
198 return a;
199
200 return PERM_UNKNOWN;
201}
202
203/* Check to see if all loads rooted in ROOT are linear. Linearity is
204 defined as having no gaps between values loaded. */
205
c3a2bc6d 206static complex_perm_kinds_t
3ed472af
TC
207linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
208{
209 if (!root)
c3a2bc6d 210 return PERM_UNKNOWN;
3ed472af
TC
211
212 unsigned i;
c3a2bc6d 213 complex_perm_kinds_t *tmp;
3ed472af
TC
214
215 if ((tmp = perm_cache->get (root)) != NULL)
216 return *tmp;
217
c3a2bc6d 218 complex_perm_kinds_t retval = PERM_UNKNOWN;
3ed472af
TC
219 perm_cache->put (root, retval);
220
221 /* If it's a load node, then just read the load permute. */
222 if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
223 {
c3a2bc6d 224 retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
3ed472af
TC
225 perm_cache->put (root, retval);
226 return retval;
227 }
228 else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
229 {
c3a2bc6d 230 retval = PERM_TOP;
bd4298e1 231 perm_cache->put (root, retval);
3ed472af
TC
232 return retval;
233 }
234
3ed472af
TC
235 complex_perm_kinds_t kind = PERM_TOP;
236
237 slp_tree child;
238 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
239 {
c3a2bc6d
TC
240 complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
241 kind = vect_merge_perms (kind, res);
159b0bd9 242 /* Unknown and Top are not valid on blends as they produce no permute. */
c3a2bc6d 243 retval = kind;
159b0bd9 244 if (kind == PERM_UNKNOWN || kind == PERM_TOP)
3ed472af 245 return retval;
3ed472af
TC
246 }
247
c3a2bc6d 248 retval = kind;
3ed472af
TC
249
250 perm_cache->put (root, retval);
251 return retval;
252}
253
254
255/* This function attempts to make a node rooted in NODE is linear. If the node
256 if already linear than the node itself is returned in RESULT.
257
258 If the node is not linear then a new VEC_PERM_EXPR node is created with a
259 lane permute that when applied will make the node linear. If such a
260 permute cannot be created then FALSE is returned from the function.
261
262 Here linearity is defined as having a sequential, monotically increasing
263 load position inside the load permute generated by the loads reachable from
264 NODE. */
265
266static slp_tree
267vect_build_swap_evenodd_node (slp_tree node)
268{
269 /* Attempt to linearise the permute. */
270 vec<std::pair<unsigned, unsigned> > zipped;
271 zipped.create (SLP_TREE_LANES (node));
272
273 for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
274 {
275 zipped.quick_push (std::make_pair (0, x+1));
276 zipped.quick_push (std::make_pair (0, x));
277 }
278
279 /* Create the new permute node and store it instead. */
280 slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
281 SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
282 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
283 SLP_TREE_CHILDREN (vnode).quick_push (node);
284 SLP_TREE_REF_COUNT (vnode) = 1;
285 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
286 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
287 SLP_TREE_REF_COUNT (node)++;
288 return vnode;
289}
290
291/* Checks to see of the expression represented by NODE is a gimple assign with
292 code CODE. */
293
294static inline bool
295vect_match_expression_p (slp_tree node, tree_code code)
296{
297 if (!node
298 || !SLP_TREE_REPRESENTATIVE (node))
299 return false;
300
301 gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
302 if (!is_gimple_assign (expr)
303 || gimple_assign_rhs_code (expr) != code)
304 return false;
305
306 return true;
307}
308
31fac318
TC
309/* Checks to see if the expression represented by NODE is a call to the internal
310 function FN. */
311
312static inline bool
313vect_match_call_p (slp_tree node, internal_fn fn)
314{
315 if (!node
316 || !SLP_TREE_REPRESENTATIVE (node))
317 return false;
318
319 gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
320 if (!expr
321 || !gimple_call_internal_p (expr, fn))
322 return false;
323
324 return true;
325}
326
3ed472af
TC
327/* Check if the given lane permute in PERMUTES matches an alternating sequence
328 of {even odd even odd ...}. This to account for unrolled loops. Further
329 mode there resulting permute must be linear. */
330
331static inline bool
332vect_check_evenodd_blend (lane_permutation_t &permutes,
333 unsigned even, unsigned odd)
334{
9c68e2ab
TC
335 if (permutes.length () == 0
336 || permutes.length () % 2 != 0)
3ed472af
TC
337 return false;
338
339 unsigned val[2] = {even, odd};
340 unsigned seed = 0;
341 for (unsigned i = 0; i < permutes.length (); i++)
342 if (permutes[i].first != val[i % 2]
343 || permutes[i].second != seed++)
344 return false;
345
346 return true;
347}
348
349/* This function will match the two gimple expressions representing NODE1 and
350 NODE2 in parallel and returns the pair operation that represents the two
351 expressions in the two statements.
352
353 If match is successful then the corresponding complex_operation is
354 returned and the arguments to the two matched operations are returned in OPS.
355
356 If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
357 from the two nodes alternatingly.
358
359 If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
360
361 e.g. the following gimple statements
362
363 stmt 0 _39 = _37 + _12;
364 stmt 1 _6 = _38 - _36;
365
366 will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
367*/
368
369static complex_operation_t
370vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
371 bool two_operands = true, vec<slp_tree> *ops = NULL)
372{
373 complex_operation_t result = CMPLX_NONE;
374
375 if (vect_match_expression_p (node1, MINUS_EXPR)
376 && vect_match_expression_p (node2, PLUS_EXPR)
377 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
378 result = MINUS_PLUS;
379 else if (vect_match_expression_p (node1, PLUS_EXPR)
380 && vect_match_expression_p (node2, MINUS_EXPR)
381 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
382 result = PLUS_MINUS;
383 else if (vect_match_expression_p (node1, PLUS_EXPR)
384 && vect_match_expression_p (node2, PLUS_EXPR))
385 result = PLUS_PLUS;
386 else if (vect_match_expression_p (node1, MULT_EXPR)
387 && vect_match_expression_p (node2, MULT_EXPR))
388 result = MULT_MULT;
389
390 if (result != CMPLX_NONE && ops != NULL)
391 {
5296bd57
TC
392 ops->safe_push (node1);
393 ops->safe_push (node2);
3ed472af
TC
394 }
395 return result;
396}
397
398/* Overload of vect_detect_pair_op that matches against the representative
399 statements in the children of NODE. It is expected that NODE has exactly
400 two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */
401
402static complex_operation_t
403vect_detect_pair_op (slp_tree node, bool two_operands = true,
404 vec<slp_tree> *ops = NULL)
405{
406 if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR)
407 return CMPLX_NONE;
408
409 if (SLP_TREE_CHILDREN (node).length () != 2)
410 return CMPLX_NONE;
411
412 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
413 lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
414
415 return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
416 ops);
417}
418
419/*******************************************************************************
420 * complex_pattern class
421 ******************************************************************************/
422
423/* SLP Complex Numbers pattern matching.
424
425 As an example, the following simple loop:
426
427 double a[restrict N]; double b[restrict N]; double c[restrict N];
428
429 for (int i=0; i < N; i+=2)
430 {
431 c[i] = a[i] - b[i+1];
432 c[i+1] = a[i+1] + b[i];
433 }
434
435 which represents a complex addition on with a rotation of 90* around the
436 argand plane. i.e. if `a` and `b` were complex numbers then this would be the
437 same as `a + (b * I)`.
438
439 Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
440 both recognized in order for the pattern to work. As an SLP tree this is
441 represented as
442
443 +--------------------------------+
444 | stmt 0 *_9 = _10; |
445 | stmt 1 *_15 = _16; |
446 +--------------------------------+
447 |
448 |
449 v
450 +--------------------------------+
451 | stmt 0 _10 = _4 - _8; |
452 | stmt 1 _16 = _12 + _14; |
453 | lane permutation { 0[0] 1[1] } |
454 +--------------------------------+
455 | |
456 | |
457 | |
458 +-----+ | | +-----+
459 | | | | | |
460 +-----| { } |<-----+ +----->| { } --------+
461 | | | +------------------| | |
462 | +-----+ | +-----+ |
463 | | | |
464 | | | |
465 | +------|------------------+ |
466 | | | |
467 v v v v
468 +--------------------------+ +--------------------------------+
469 | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; |
470 | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; |
471 | load permutation { 1 0 } | | load permutation { 0 1 } |
472 +--------------------------+ +--------------------------------+
473
474 The pattern matcher allows you to replace both statements 0 and 1 or none at
475 all. Because this operation is a two operands operation the actual nodes
476 being replaced are those in the { } nodes. The actual scalar statements
477 themselves are not replaced or used during the matching but instead the
478 SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to
479 replace and match on any number of nodes.
480
481 Because the pattern matcher matches on the representative statement for the
482 SLP node the case of two_operators it allows you to match the children of the
483 node. This is done using the method `recognize ()`.
484
485*/
486
487/* The complex_pattern class contains common code for pattern matchers that work
488 on complex numbers. These provide functionality to allow de-construction and
489 validation of sequences depicting/transforming REAL and IMAG pairs. */
490
491class complex_pattern : public vect_pattern
492{
493 protected:
494 auto_vec<slp_tree> m_workset;
495 complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
496 : vect_pattern (node, m_ops, ifn)
497 {
498 this->m_workset.safe_push (*node);
499 }
500
501 public:
502 void build (vec_info *);
503
504 static internal_fn
0c18faac 505 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
3ed472af
TC
506 vec<slp_tree> *);
507};
508
509/* Create a replacement pattern statement for each node in m_node and inserts
510 the new statement into m_node as the new representative statement. The old
511 statement is marked as being in a pattern defined by the new statement. The
512 statement is created as call to internal function IFN with m_num_args
513 arguments.
514
515 Futhermore the new pattern is also added to the vectorization information
516 structure VINFO and the old statement STMT_INFO is marked as unused while
517 the new statement is marked as used and the number of SLP uses of the new
518 statement is incremented.
519
520 The newly created SLP nodes are marked as SLP only and will be dissolved
521 if SLP is aborted.
522
523 The newly created gimple call is returned and the BB remains unchanged.
524
525 This default method is designed to only match against simple operands where
526 all the input and output types are the same.
527*/
528
529void
530complex_pattern::build (vec_info *vinfo)
531{
532 stmt_vec_info stmt_info;
533
534 auto_vec<tree> args;
535 args.create (this->m_num_args);
536 args.quick_grow_cleared (this->m_num_args);
537 slp_tree node;
538 unsigned ix;
539 stmt_vec_info call_stmt_info;
540 gcall *call_stmt = NULL;
541
542 /* Now modify the nodes themselves. */
543 FOR_EACH_VEC_ELT (this->m_workset, ix, node)
544 {
545 /* Calculate the location of the statement in NODE to replace. */
546 stmt_info = SLP_TREE_REPRESENTATIVE (node);
374f93da
RB
547 stmt_vec_info reduc_def
548 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
3ed472af
TC
549 gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
550 tree lhs_old_stmt = gimple_get_lhs (old_stmt);
551 tree type = TREE_TYPE (lhs_old_stmt);
552
553 /* Create the argument set for use by gimple_build_call_internal_vec. */
554 for (unsigned i = 0; i < this->m_num_args; i++)
555 args[i] = lhs_old_stmt;
556
557 /* Create the new pattern statements. */
558 call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
559 tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
560 gimple_call_set_lhs (call_stmt, var);
561 gimple_set_location (call_stmt, gimple_location (old_stmt));
562 gimple_call_set_nothrow (call_stmt, true);
563
564 /* Adjust the book-keeping for the new and old statements for use during
565 SLP. This is required to get the right VF and statement during SLP
566 analysis. These changes are created after relevancy has been set for
567 the nodes as such we need to manually update them. Any changes will be
568 undone if SLP is cancelled. */
569 call_stmt_info
570 = vinfo->add_pattern_stmt (call_stmt, stmt_info);
571
572 /* Make sure to mark the representative statement pure_slp and
374f93da 573 relevant and transfer reduction info. */
3ed472af
TC
574 STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
575 STMT_SLP_TYPE (call_stmt_info) = pure_slp;
374f93da 576 STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def;
3ed472af 577
a29124d2
RB
578 gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
579 STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
5e606ed9 580 STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true;
3ed472af
TC
581
582 /* Since we are replacing all the statements in the group with the same
583 thing it doesn't really matter. So just set it every time a new stmt
584 is created. */
585 SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
586 SLP_TREE_LANE_PERMUTATION (node).release ();
587 SLP_TREE_CODE (node) = CALL_EXPR;
588 }
589}
590
591/*******************************************************************************
592 * complex_add_pattern class
593 ******************************************************************************/
594
595class complex_add_pattern : public complex_pattern
596{
597 protected:
598 complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
599 : complex_pattern (node, m_ops, ifn)
600 {
601 this->m_num_args = 2;
602 }
603
604 public:
605 void build (vec_info *);
606 static internal_fn
0c18faac 607 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
3ed472af
TC
608 vec<slp_tree> *);
609
610 static vect_pattern*
611 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
0c18faac
TC
612
613 static vect_pattern*
614 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
615 {
616 return new complex_add_pattern (node, m_ops, ifn);
617 }
3ed472af
TC
618};
619
620/* Perform a replacement of the detected complex add pattern with the new
621 instruction sequences. */
622
623void
624complex_add_pattern::build (vec_info *vinfo)
625{
fe701195
TC
626 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
627
3ed472af
TC
628 slp_tree node = this->m_ops[0];
629 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
630
631 /* First re-arrange the children. */
fe701195
TC
632 SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
633 SLP_TREE_CHILDREN (*this->m_node)[1] =
634 vect_build_swap_evenodd_node (children[1]);
3ed472af 635
fe701195
TC
636 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
637 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
0c18faac
TC
638 vect_free_slp_tree (this->m_ops[0]);
639 vect_free_slp_tree (this->m_ops[1]);
640
3ed472af
TC
641 complex_pattern::build (vinfo);
642}
643
644/* Pattern matcher for trying to match complex addition pattern in SLP tree.
645
646 If no match is found then IFN is set to IFN_LAST.
647 This function matches the patterns shaped as:
648
649 c[i] = a[i] - b[i+1];
650 c[i+1] = a[i+1] + b[i];
651
652 If a match occurred then TRUE is returned, else FALSE. The initial match is
653 expected to be in OP1 and the initial match operands in args0. */
654
655internal_fn
656complex_add_pattern::matches (complex_operation_t op,
657 slp_tree_to_load_perm_map_t *perm_cache,
0c18faac 658 slp_tree *node, vec<slp_tree> *ops)
3ed472af
TC
659{
660 internal_fn ifn = IFN_LAST;
661
662 /* Find the two components. Rotation in the complex plane will modify
663 the operations:
664
665 * Rotation 0: + +
666 * Rotation 90: - +
667 * Rotation 180: - -
668 * Rotation 270: + -
669
670 Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
671 to care about them here. */
672 if (op == MINUS_PLUS)
673 ifn = IFN_COMPLEX_ADD_ROT90;
674 else if (op == PLUS_MINUS)
675 ifn = IFN_COMPLEX_ADD_ROT270;
676 else
677 return ifn;
678
679 /* verify that there is a permute, otherwise this isn't a pattern we
680 we support. */
681 gcc_assert (ops->length () == 2);
682
683 vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
684
685 /* First node must be unpermuted. */
c3a2bc6d 686 if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
3ed472af
TC
687 return IFN_LAST;
688
689 /* Second node must be permuted. */
c3a2bc6d 690 if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
3ed472af
TC
691 return IFN_LAST;
692
0c18faac
TC
693 if (!vect_pattern_validate_optab (ifn, *node))
694 return IFN_LAST;
695
3ed472af
TC
696 return ifn;
697}
698
699/* Attempt to recognize a complex add pattern. */
700
701vect_pattern*
702complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
703 slp_tree *node)
704{
705 auto_vec<slp_tree> ops;
706 complex_operation_t op
707 = vect_detect_pair_op (*node, true, &ops);
0c18faac
TC
708 internal_fn ifn
709 = complex_add_pattern::matches (op, perm_cache, node, &ops);
710 if (ifn == IFN_LAST)
3ed472af
TC
711 return NULL;
712
713 return new complex_add_pattern (node, &ops, ifn);
714}
715
e09173d8
TC
716/*******************************************************************************
717 * complex_mul_pattern
718 ******************************************************************************/
719
720/* Helper function of that looks for a match in the CHILDth child of NODE. The
721 child used is stored in RES.
722
723 If the match is successful then ARGS will contain the operands matched
724 and the complex_operation_t type is returned. If match is not successful
725 then CMPLX_NONE is returned and ARGS is left unmodified. */
726
727static inline complex_operation_t
728vect_match_call_complex_mla (slp_tree node, unsigned child,
729 vec<slp_tree> *args = NULL, slp_tree *res = NULL)
730{
731 gcc_assert (child < SLP_TREE_CHILDREN (node).length ());
732
733 slp_tree data = SLP_TREE_CHILDREN (node)[child];
734
735 if (res)
736 *res = data;
737
738 return vect_detect_pair_op (data, false, args);
739}
740
741/* Check to see if either of the trees in ARGS are a NEGATE_EXPR. If the first
742 child (args[0]) is a NEGATE_EXPR then NEG_FIRST_P is set to TRUE.
743
744 If a negate is found then the values in ARGS are reordered such that the
745 negate node is always the second one and the entry is replaced by the child
746 of the negate node. */
747
748static inline bool
a3d3e8c3 749vect_normalize_conj_loc (vec<slp_tree> &args, bool *neg_first_p = NULL)
e09173d8
TC
750{
751 gcc_assert (args.length () == 2);
752 bool neg_found = false;
753
754 if (vect_match_expression_p (args[0], NEGATE_EXPR))
755 {
756 std::swap (args[0], args[1]);
757 neg_found = true;
758 if (neg_first_p)
759 *neg_first_p = true;
760 }
761 else if (vect_match_expression_p (args[1], NEGATE_EXPR))
762 {
763 neg_found = true;
764 if (neg_first_p)
765 *neg_first_p = false;
766 }
767
768 if (neg_found)
769 args[1] = SLP_TREE_CHILDREN (args[1])[0];
770
771 return neg_found;
772}
773
774/* Helper function to check if PERM is KIND or PERM_TOP. */
775
776static inline bool
c3a2bc6d 777is_eq_or_top (complex_perm_kinds_t perm, complex_perm_kinds_t kind)
e09173d8 778{
c3a2bc6d 779 return perm == kind || perm == PERM_TOP;
e09173d8
TC
780}
781
782/* Helper function that checks to see if LEFT_OP and RIGHT_OP are both MULT_EXPR
783 nodes but also that they represent an operation that is either a complex
784 multiplication or a complex multiplication by conjugated value.
785
786 Of the negation is expected to be in the first half of the tree (As required
787 by an FMS pattern) then NEG_FIRST is true. If the operation is a conjugate
788 operation then CONJ_FIRST_OPERAND is set to indicate whether the first or
789 second operand contains the conjugate operation. */
790
791static inline bool
792vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
00dcc88a
MS
793 const vec<slp_tree> &left_op,
794 const vec<slp_tree> &right_op,
e09173d8
TC
795 bool neg_first, bool *conj_first_operand,
796 bool fms)
797{
798 /* The presence of a negation indicates that we have either a conjugate or a
799 rotation. We need to distinguish which one. */
800 *conj_first_operand = false;
801 complex_perm_kinds_t kind;
802
803 /* Complex conjugates have the negation on the imaginary part of the
804 number where rotations affect the real component. So check if the
805 negation is on a dup of lane 1. */
806 if (fms)
807 {
808 /* Canonicalization for fms is not consistent. So have to test both
809 variants to be sure. This needs to be fixed in the mid-end so
810 this part can be simpler. */
c3a2bc6d 811 kind = linear_loads_p (perm_cache, right_op[0]);
478e571a 812 if (!((is_eq_or_top (linear_loads_p (perm_cache, right_op[0]), PERM_ODDODD)
e09173d8
TC
813 && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]),
814 PERM_ODDEVEN))
815 || (kind == PERM_ODDEVEN
816 && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]),
817 PERM_ODDODD))))
818 return false;
819 }
820 else
821 {
c3a2bc6d 822 if (linear_loads_p (perm_cache, right_op[1]) != PERM_ODDODD
e09173d8
TC
823 && !is_eq_or_top (linear_loads_p (perm_cache, right_op[0]),
824 PERM_ODDEVEN))
825 return false;
826 }
827
828 /* Deal with differences in indexes. */
829 int index1 = fms ? 1 : 0;
830 int index2 = fms ? 0 : 1;
831
832 /* Check if the conjugate is on the second first or second operand. The
833 order of the node with the conjugate value determines this, and the dup
834 node must be one of lane 0 of the same DR as the neg node. */
c3a2bc6d 835 kind = linear_loads_p (perm_cache, left_op[index1]);
e09173d8
TC
836 if (kind == PERM_TOP)
837 {
c3a2bc6d 838 if (linear_loads_p (perm_cache, left_op[index2]) == PERM_EVENODD)
e09173d8
TC
839 return true;
840 }
841 else if (kind == PERM_EVENODD)
842 {
c3a2bc6d 843 if ((kind = linear_loads_p (perm_cache, left_op[index2])) == PERM_EVENODD)
e09173d8 844 return false;
478e571a 845 return true;
e09173d8
TC
846 }
847 else if (!neg_first)
848 *conj_first_operand = true;
849 else
850 return false;
851
852 if (kind != PERM_EVENEVEN)
853 return false;
854
855 return true;
856}
857
858/* Helper function to help distinguish between a conjugate and a rotation in a
859 complex multiplication. The operations have similar shapes but the order of
860 the load permutes are different. This function returns TRUE when the order
861 is consistent with a multiplication or multiplication by conjugated
862 operand but returns FALSE if it's a multiplication by rotated operand. */
863
864static inline bool
865vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
00dcc88a
MS
866 const vec<slp_tree> &op,
867 complex_perm_kinds_t permKind)
e09173d8
TC
868{
869 /* The left node is the more common case, test it first. */
870 if (!is_eq_or_top (linear_loads_p (perm_cache, op[0]), permKind))
871 {
872 if (!is_eq_or_top (linear_loads_p (perm_cache, op[1]), permKind))
873 return false;
874 }
875 return true;
876}
877
878/* This function combines two nodes containing only even and only odd lanes
879 together into a single node which contains the nodes in even/odd order
880 by using a lane permute.
881
882 The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
883 So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
884
885 The tree REPRESENTATION is taken from the supplied REP along with the
886 vectype which must be the same between all three nodes.
887*/
888
889static slp_tree
890vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
891{
892 vec<std::pair<unsigned, unsigned> > perm;
893 perm.create (SLP_TREE_LANES (rep));
894
895 for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
896 {
897 perm.quick_push (std::make_pair (0, x));
898 perm.quick_push (std::make_pair (1, x+1));
899 }
900
901 slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even));
902 SLP_TREE_CODE (vnode) = VEC_PERM_EXPR;
903 SLP_TREE_LANE_PERMUTATION (vnode) = perm;
904
905 SLP_TREE_CHILDREN (vnode).create (2);
906 SLP_TREE_CHILDREN (vnode).quick_push (even);
907 SLP_TREE_CHILDREN (vnode).quick_push (odd);
908 SLP_TREE_REF_COUNT (even)++;
909 SLP_TREE_REF_COUNT (odd)++;
910 SLP_TREE_REF_COUNT (vnode) = 1;
911
912 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
913 gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
914 /* Representation is set to that of the current node as the vectorizer
915 can't deal with VEC_PERMs with no representation, as would be the
916 case with invariants. */
917 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
918 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
919 return vnode;
920}
921
922class complex_mul_pattern : public complex_pattern
923{
924 protected:
925 complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
926 : complex_pattern (node, m_ops, ifn)
927 {
928 this->m_num_args = 2;
929 }
930
931 public:
932 void build (vec_info *);
933 static internal_fn
934 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
935 vec<slp_tree> *);
936
937 static vect_pattern*
938 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
939
940 static vect_pattern*
941 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
942 {
943 return new complex_mul_pattern (node, m_ops, ifn);
944 }
945
946};
947
948/* Pattern matcher for trying to match complex multiply pattern in SLP tree
949 If the operation matches then IFN is set to the operation it matched
950 and the arguments to the two replacement statements are put in m_ops.
951
952 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
953
954 This function matches the patterns shaped as:
955
956 double ax = (b[i+1] * a[i]);
957 double bx = (a[i+1] * b[i]);
958
959 c[i] = c[i] - ax;
960 c[i+1] = c[i+1] + bx;
961
962 If a match occurred then TRUE is returned, else FALSE. The initial match is
963 expected to be in OP1 and the initial match operands in args0. */
964
965internal_fn
966complex_mul_pattern::matches (complex_operation_t op,
967 slp_tree_to_load_perm_map_t *perm_cache,
968 slp_tree *node, vec<slp_tree> *ops)
969{
970 internal_fn ifn = IFN_LAST;
971
972 if (op != MINUS_PLUS)
973 return IFN_LAST;
974
975 slp_tree root = *node;
976 /* First two nodes must be a multiply. */
977 auto_vec<slp_tree> muls;
978 if (vect_match_call_complex_mla (root, 0) != MULT_MULT
979 || vect_match_call_complex_mla (root, 1, &muls) != MULT_MULT)
980 return IFN_LAST;
981
982 /* Now operand2+4 may lead to another expression. */
983 auto_vec<slp_tree> left_op, right_op;
984 left_op.safe_splice (SLP_TREE_CHILDREN (muls[0]));
985 right_op.safe_splice (SLP_TREE_CHILDREN (muls[1]));
986
c3a2bc6d 987 if (linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
e09173d8
TC
988 return IFN_LAST;
989
990 bool neg_first = false;
991 bool conj_first_operand = false;
992 bool is_neg = vect_normalize_conj_loc (right_op, &neg_first);
993
994 if (!is_neg)
995 {
996 /* A multiplication needs to multiply agains the real pair, otherwise
997 the pattern matches that of FMS. */
998 if (!vect_validate_multiplication (perm_cache, left_op, PERM_EVENEVEN)
999 || vect_normalize_conj_loc (left_op))
1000 return IFN_LAST;
1001 ifn = IFN_COMPLEX_MUL;
1002 }
1003 else if (is_neg)
1004 {
1005 if (!vect_validate_multiplication (perm_cache, left_op, right_op,
1006 neg_first, &conj_first_operand,
1007 false))
1008 return IFN_LAST;
1009
1010 ifn = IFN_COMPLEX_MUL_CONJ;
1011 }
1012
1013 if (!vect_pattern_validate_optab (ifn, *node))
1014 return IFN_LAST;
1015
1016 ops->truncate (0);
1017 ops->create (3);
1018
c3a2bc6d 1019 complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]);
e09173d8
TC
1020 if (kind == PERM_EVENODD)
1021 {
1022 ops->quick_push (left_op[1]);
1023 ops->quick_push (right_op[1]);
1024 ops->quick_push (left_op[0]);
1025 }
1026 else if (kind == PERM_TOP)
1027 {
1028 ops->quick_push (left_op[1]);
1029 ops->quick_push (right_op[1]);
1030 ops->quick_push (left_op[0]);
1031 }
1032 else if (kind == PERM_EVENEVEN && !conj_first_operand)
1033 {
1034 ops->quick_push (left_op[0]);
1035 ops->quick_push (right_op[0]);
1036 ops->quick_push (left_op[1]);
1037 }
1038 else
1039 {
1040 ops->quick_push (left_op[0]);
1041 ops->quick_push (right_op[1]);
1042 ops->quick_push (left_op[1]);
1043 }
1044
1045 return ifn;
1046}
1047
1048/* Attempt to recognize a complex mul pattern. */
1049
1050vect_pattern*
1051complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1052 slp_tree *node)
1053{
1054 auto_vec<slp_tree> ops;
1055 complex_operation_t op
1056 = vect_detect_pair_op (*node, true, &ops);
1057 internal_fn ifn
1058 = complex_mul_pattern::matches (op, perm_cache, node, &ops);
1059 if (ifn == IFN_LAST)
1060 return NULL;
1061
1062 return new complex_mul_pattern (node, &ops, ifn);
1063}
1064
1065/* Perform a replacement of the detected complex mul pattern with the new
1066 instruction sequences. */
1067
1068void
1069complex_mul_pattern::build (vec_info *vinfo)
1070{
1071 slp_tree node;
1072 unsigned i;
5296bd57
TC
1073 slp_tree newnode
1074 = vect_build_combine_node (this->m_ops[0], this->m_ops[1], *this->m_node);
1075 SLP_TREE_REF_COUNT (this->m_ops[2])++;
1076
e09173d8
TC
1077 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1078 vect_free_slp_tree (node);
1079
1080 /* First re-arrange the children. */
1081 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
1082 SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
5296bd57 1083 SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
e09173d8
TC
1084
1085 /* And then rewrite the node itself. */
1086 complex_pattern::build (vinfo);
1087}
1088
31fac318
TC
1089/*******************************************************************************
1090 * complex_fma_pattern class
1091 ******************************************************************************/
1092
1093class complex_fma_pattern : public complex_pattern
1094{
1095 protected:
1096 complex_fma_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1097 : complex_pattern (node, m_ops, ifn)
1098 {
1099 this->m_num_args = 3;
1100 }
1101
1102 public:
1103 void build (vec_info *);
1104 static internal_fn
1105 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1106 vec<slp_tree> *);
1107
1108 static vect_pattern*
1109 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1110
1111 static vect_pattern*
1112 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1113 {
1114 return new complex_fma_pattern (node, m_ops, ifn);
1115 }
1116};
1117
31fac318
TC
1118/* Pattern matcher for trying to match complex multiply and accumulate
1119 and multiply and subtract patterns in SLP tree.
1120 If the operation matches then IFN is set to the operation it matched and
1121 the arguments to the two replacement statements are put in m_ops.
1122
1123 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1124
1125 This function matches the patterns shaped as:
1126
1127 double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1128 double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1129
1130 c[i] = c[i] - ax;
1131 c[i+1] = c[i+1] + bx;
1132
1133 If a match occurred then TRUE is returned, else FALSE. The match is
1134 performed after COMPLEX_MUL which would have done the majority of the work.
1135 This function merely matches an ADD with a COMPLEX_MUL IFN. The initial
1136 match is expected to be in OP1 and the initial match operands in args0. */
1137
1138internal_fn
1139complex_fma_pattern::matches (complex_operation_t op,
1140 slp_tree_to_load_perm_map_t * /* perm_cache */,
1141 slp_tree *ref_node, vec<slp_tree> *ops)
1142{
1143 internal_fn ifn = IFN_LAST;
1144
1145 /* Find the two components. We match Complex MUL first which reduces the
1146 amount of work this pattern has to do. After that we just match the
1147 head node and we're done.:
1148
1149 * FMA: + +.
1150
1151 We need to ignore the two_operands nodes that may also match.
1152 For that we can check if they have any scalar statements and also
1153 check that it's not a permute node as we're looking for a normal
1154 PLUS_EXPR operation. */
1155 if (op != CMPLX_NONE)
1156 return IFN_LAST;
1157
1158 /* Find the two components. We match Complex MUL first which reduces the
1159 amount of work this pattern has to do. After that we just match the
1160 head node and we're done.:
1161
1162 * FMA: + + on a non-two_operands node. */
1163 slp_tree vnode = *ref_node;
1164 if (SLP_TREE_LANE_PERMUTATION (vnode).exists ()
1165 || !SLP_TREE_CHILDREN (vnode).exists ()
1166 || !vect_match_expression_p (vnode, PLUS_EXPR))
1167 return IFN_LAST;
1168
1169 slp_tree node = SLP_TREE_CHILDREN (vnode)[1];
1170
1171 if (vect_match_call_p (node, IFN_COMPLEX_MUL))
1172 ifn = IFN_COMPLEX_FMA;
1173 else if (vect_match_call_p (node, IFN_COMPLEX_MUL_CONJ))
1174 ifn = IFN_COMPLEX_FMA_CONJ;
1175 else
1176 return IFN_LAST;
1177
1178 if (!vect_pattern_validate_optab (ifn, vnode))
1179 return IFN_LAST;
1180
31fac318
TC
1181 ops->truncate (0);
1182 ops->create (3);
1183
1184 if (ifn == IFN_COMPLEX_FMA)
1185 {
1186 ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
1187 ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
1188 ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
1189 }
1190 else
1191 {
1192 ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
1193 ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
1194 ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
1195 }
1196
1197 return ifn;
1198}
1199
1200/* Attempt to recognize a complex mul pattern. */
1201
1202vect_pattern*
1203complex_fma_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1204 slp_tree *node)
1205{
1206 auto_vec<slp_tree> ops;
1207 complex_operation_t op
1208 = vect_detect_pair_op (*node, true, &ops);
1209 internal_fn ifn
1210 = complex_fma_pattern::matches (op, perm_cache, node, &ops);
1211 if (ifn == IFN_LAST)
1212 return NULL;
1213
1214 return new complex_fma_pattern (node, &ops, ifn);
1215}
1216
1217/* Perform a replacement of the detected complex mul pattern with the new
1218 instruction sequences. */
1219
1220void
1221complex_fma_pattern::build (vec_info *vinfo)
1222{
5296bd57
TC
1223 slp_tree node = SLP_TREE_CHILDREN (*this->m_node)[1];
1224
31fac318
TC
1225 SLP_TREE_CHILDREN (*this->m_node).release ();
1226 SLP_TREE_CHILDREN (*this->m_node).create (3);
1227 SLP_TREE_CHILDREN (*this->m_node).safe_splice (this->m_ops);
1228
5296bd57
TC
1229 SLP_TREE_REF_COUNT (this->m_ops[1])++;
1230 SLP_TREE_REF_COUNT (this->m_ops[2])++;
1231
1232 vect_free_slp_tree (node);
1233
31fac318
TC
1234 complex_pattern::build (vinfo);
1235}
1236
478e571a
TC
1237/*******************************************************************************
1238 * complex_fms_pattern class
1239 ******************************************************************************/
1240
1241class complex_fms_pattern : public complex_pattern
1242{
1243 protected:
1244 complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1245 : complex_pattern (node, m_ops, ifn)
1246 {
1247 this->m_num_args = 3;
1248 }
1249
1250 public:
1251 void build (vec_info *);
1252 static internal_fn
1253 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1254 vec<slp_tree> *);
1255
1256 static vect_pattern*
1257 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1258
1259 static vect_pattern*
1260 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1261 {
1262 return new complex_fms_pattern (node, m_ops, ifn);
1263 }
1264};
1265
1266
1267/* Pattern matcher for trying to match complex multiply and accumulate
1268 and multiply and subtract patterns in SLP tree.
1269 If the operation matches then IFN is set to the operation it matched and
1270 the arguments to the two replacement statements are put in m_ops.
1271
1272 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1273
1274 This function matches the patterns shaped as:
1275
1276 double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1277 double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1278
1279 c[i] = c[i] - ax;
1280 c[i+1] = c[i+1] + bx;
1281
1282 If a match occurred then TRUE is returned, else FALSE. The initial match is
1283 expected to be in OP1 and the initial match operands in args0. */
1284
1285internal_fn
1286complex_fms_pattern::matches (complex_operation_t op,
1287 slp_tree_to_load_perm_map_t *perm_cache,
1288 slp_tree * ref_node, vec<slp_tree> *ops)
1289{
1290 internal_fn ifn = IFN_LAST;
1291
1292 /* Find the two components. We match Complex MUL first which reduces the
1293 amount of work this pattern has to do. After that we just match the
1294 head node and we're done.:
1295
1296 * FMS: - +. */
1297 slp_tree child = NULL;
1298
1299 /* We need to ignore the two_operands nodes that may also match,
1300 for that we can check if they have any scalar statements and also
1301 check that it's not a permute node as we're looking for a normal
1302 PLUS_EXPR operation. */
1303 if (op != PLUS_MINUS)
1304 return IFN_LAST;
1305
1306 child = SLP_TREE_CHILDREN ((*ops)[1])[1];
1307 if (vect_detect_pair_op (child) != MINUS_PLUS)
1308 return IFN_LAST;
1309
1310 /* First two nodes must be a multiply. */
1311 auto_vec<slp_tree> muls;
1312 if (vect_match_call_complex_mla (child, 0) != MULT_MULT
1313 || vect_match_call_complex_mla (child, 1, &muls) != MULT_MULT)
1314 return IFN_LAST;
1315
1316 /* Now operand2+4 may lead to another expression. */
1317 auto_vec<slp_tree> left_op, right_op;
1318 left_op.safe_splice (SLP_TREE_CHILDREN (muls[0]));
1319 right_op.safe_splice (SLP_TREE_CHILDREN (muls[1]));
1320
1321 bool is_neg = vect_normalize_conj_loc (left_op);
1322
1323 child = SLP_TREE_CHILDREN ((*ops)[1])[0];
1324 bool conj_first_operand = false;
1325 if (!vect_validate_multiplication (perm_cache, right_op, left_op, false,
1326 &conj_first_operand, true))
1327 return IFN_LAST;
1328
1329 if (!is_neg)
1330 ifn = IFN_COMPLEX_FMS;
1331 else if (is_neg)
1332 ifn = IFN_COMPLEX_FMS_CONJ;
1333
1334 if (!vect_pattern_validate_optab (ifn, *ref_node))
1335 return IFN_LAST;
1336
1337 ops->truncate (0);
1338 ops->create (4);
1339
c3a2bc6d 1340 complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]);
478e571a
TC
1341 if (kind == PERM_EVENODD)
1342 {
1343 ops->quick_push (child);
1344 ops->quick_push (right_op[0]);
1345 ops->quick_push (right_op[1]);
1346 ops->quick_push (left_op[1]);
1347 }
1348 else if (kind == PERM_TOP)
1349 {
1350 ops->quick_push (child);
1351 ops->quick_push (right_op[1]);
1352 ops->quick_push (right_op[0]);
1353 ops->quick_push (left_op[0]);
1354 }
1355 else if (kind == PERM_EVENEVEN && !is_neg)
1356 {
1357 ops->quick_push (child);
1358 ops->quick_push (right_op[1]);
1359 ops->quick_push (right_op[0]);
1360 ops->quick_push (left_op[0]);
1361 }
1362 else
1363 {
1364 ops->quick_push (child);
1365 ops->quick_push (right_op[1]);
1366 ops->quick_push (right_op[0]);
1367 ops->quick_push (left_op[1]);
1368 }
1369
1370 return ifn;
1371}
1372
1373/* Attempt to recognize a complex mul pattern. */
1374
1375vect_pattern*
1376complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1377 slp_tree *node)
1378{
1379 auto_vec<slp_tree> ops;
1380 complex_operation_t op
1381 = vect_detect_pair_op (*node, true, &ops);
1382 internal_fn ifn
1383 = complex_fms_pattern::matches (op, perm_cache, node, &ops);
1384 if (ifn == IFN_LAST)
1385 return NULL;
1386
1387 return new complex_fms_pattern (node, &ops, ifn);
1388}
1389
1390/* Perform a replacement of the detected complex mul pattern with the new
1391 instruction sequences. */
1392
1393void
1394complex_fms_pattern::build (vec_info *vinfo)
1395{
1396 slp_tree node;
1397 unsigned i;
5296bd57
TC
1398 slp_tree newnode =
1399 vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
1400 SLP_TREE_REF_COUNT (this->m_ops[0])++;
1401 SLP_TREE_REF_COUNT (this->m_ops[1])++;
1402
478e571a
TC
1403 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1404 vect_free_slp_tree (node);
1405
1406 SLP_TREE_CHILDREN (*this->m_node).release ();
1407 SLP_TREE_CHILDREN (*this->m_node).create (3);
1408
1409 /* First re-arrange the children. */
1410 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
1411 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
5296bd57 1412 SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
478e571a
TC
1413
1414 /* And then rewrite the node itself. */
1415 complex_pattern::build (vinfo);
1416}
1417
b50df1e7
TC
1418/*******************************************************************************
1419 * complex_operations_pattern class
1420 ******************************************************************************/
1421
1422/* This function combines all the existing pattern matchers above into one class
1423 that shares the functionality between them. The initial match is shared
1424 between all complex operations. */
1425
1426class complex_operations_pattern : public complex_pattern
1427{
1428 protected:
1429 complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
1430 internal_fn ifn)
1431 : complex_pattern (node, m_ops, ifn)
1432 {
1433 this->m_num_args = 0;
1434 }
1435
1436 public:
1437 void build (vec_info *);
1438 static internal_fn
1439 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1440 vec<slp_tree> *);
1441
1442 static vect_pattern*
1443 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1444};
1445
1446/* Dummy matches implementation for proxy object. */
1447
1448internal_fn
1449complex_operations_pattern::
1450matches (complex_operation_t /* op */,
1451 slp_tree_to_load_perm_map_t * /* perm_cache */,
1452 slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
1453{
1454 return IFN_LAST;
1455}
1456
1457/* Attempt to recognize a complex mul pattern. */
1458
1459vect_pattern*
1460complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1461 slp_tree *node)
1462{
1463 auto_vec<slp_tree> ops;
1464 complex_operation_t op
1465 = vect_detect_pair_op (*node, true, &ops);
1466 internal_fn ifn = IFN_LAST;
1467
1468 ifn = complex_fms_pattern::matches (op, perm_cache, node, &ops);
1469 if (ifn != IFN_LAST)
1470 return complex_fms_pattern::mkInstance (node, &ops, ifn);
1471
1472 ifn = complex_mul_pattern::matches (op, perm_cache, node, &ops);
1473 if (ifn != IFN_LAST)
1474 return complex_mul_pattern::mkInstance (node, &ops, ifn);
1475
1476 ifn = complex_fma_pattern::matches (op, perm_cache, node, &ops);
1477 if (ifn != IFN_LAST)
1478 return complex_fma_pattern::mkInstance (node, &ops, ifn);
1479
1480 ifn = complex_add_pattern::matches (op, perm_cache, node, &ops);
1481 if (ifn != IFN_LAST)
1482 return complex_add_pattern::mkInstance (node, &ops, ifn);
1483
1484 return NULL;
1485}
1486
1487/* Dummy implementation of build. */
1488
1489void
1490complex_operations_pattern::build (vec_info * /* vinfo */)
1491{
1492 gcc_unreachable ();
1493}
1494
7a6c31f0
RB
1495
1496/* The addsub_pattern. */
1497
1498class addsub_pattern : public vect_pattern
1499{
1500 public:
7d810646
RB
1501 addsub_pattern (slp_tree *node, internal_fn ifn)
1502 : vect_pattern (node, NULL, ifn) {};
7a6c31f0
RB
1503
1504 void build (vec_info *);
1505
1506 static vect_pattern*
1507 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1508};
1509
1510vect_pattern *
1511addsub_pattern::recognize (slp_tree_to_load_perm_map_t *, slp_tree *node_)
1512{
1513 slp_tree node = *node_;
1514 if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
7d810646
RB
1515 || SLP_TREE_CHILDREN (node).length () != 2
1516 || SLP_TREE_LANE_PERMUTATION (node).length () % 2)
7a6c31f0
RB
1517 return NULL;
1518
1519 /* Match a blend of a plus and a minus op with the same number of plus and
1520 minus lanes on the same operands. */
7d810646
RB
1521 unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1522 unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1523 if (l0 == l1)
1524 return NULL;
1525 bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
1526 PLUS_EXPR);
1527 if (!l0add_p
1528 && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
1529 return NULL;
1530 bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
1531 PLUS_EXPR);
1532 if (!l1add_p
1533 && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
7a6c31f0 1534 return NULL;
7d810646
RB
1535
1536 slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
1537 slp_tree l1node = SLP_TREE_CHILDREN (node)[l1];
1538 if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0]
1539 && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1])
1540 || (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1]
1541 && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0])))
7a6c31f0
RB
1542 return NULL;
1543
1544 for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
1545 {
1546 std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
7d810646 1547 /* It has to be alternating -, +, -,
7a6c31f0
RB
1548 While we could permute the .ADDSUB inputs and the .ADDSUB output
1549 that's only profitable over the add + sub + blend if at least
1550 one of the permute is optimized which we can't determine here. */
7d810646 1551 if (perm.first != ((i & 1) ? l1 : l0)
7a6c31f0
RB
1552 || perm.second != i)
1553 return NULL;
1554 }
1555
7d810646
RB
1556 /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
1557 (l0add_p), see whether we have FMA variants. */
1558 if (!l0add_p
1559 && vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], MULT_EXPR))
1560 {
1561 /* (c * d) -+ a */
1562 if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
1563 return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
1564 }
1565 else if (l0add_p
1566 && vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], MULT_EXPR))
1567 {
1568 /* (c * d) +- a */
1569 if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
1570 return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
1571 }
7a6c31f0 1572
7d810646
RB
1573 if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
1574 return new addsub_pattern (node_, IFN_VEC_ADDSUB);
1575
1576 return NULL;
7a6c31f0
RB
1577}
1578
1579void
1580addsub_pattern::build (vec_info *vinfo)
1581{
1582 slp_tree node = *m_node;
1583
7d810646
RB
1584 unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1585 unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1586
1587 switch (m_ifn)
1588 {
1589 case IFN_VEC_ADDSUB:
1590 {
1591 slp_tree sub = SLP_TREE_CHILDREN (node)[l0];
1592 slp_tree add = SLP_TREE_CHILDREN (node)[l1];
1593
1594 /* Modify the blend node in-place. */
1595 SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
1596 SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
1597 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1598 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1599
1600 /* Build IFN_VEC_ADDSUB from the sub representative operands. */
1601 stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
1602 gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
1603 gimple_assign_rhs1 (rep->stmt),
1604 gimple_assign_rhs2 (rep->stmt));
1605 gimple_call_set_lhs (call, make_ssa_name
1606 (TREE_TYPE (gimple_assign_lhs (rep->stmt))));
1607 gimple_call_set_nothrow (call, true);
1608 gimple_set_bb (call, gimple_bb (rep->stmt));
1609 stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep);
1610 SLP_TREE_REPRESENTATIVE (node) = new_rep;
1611 STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1612 STMT_SLP_TYPE (new_rep) = pure_slp;
1613 STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1614 STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1615 STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep));
1616 SLP_TREE_CODE (node) = ERROR_MARK;
1617 SLP_TREE_LANE_PERMUTATION (node).release ();
1618
1619 vect_free_slp_tree (sub);
1620 vect_free_slp_tree (add);
1621 break;
1622 }
1623 case IFN_VEC_FMADDSUB:
1624 case IFN_VEC_FMSUBADD:
1625 {
1626 slp_tree sub, add;
1627 if (m_ifn == IFN_VEC_FMADDSUB)
1628 {
1629 sub = SLP_TREE_CHILDREN (node)[l0];
1630 add = SLP_TREE_CHILDREN (node)[l1];
1631 }
1632 else /* m_ifn == IFN_VEC_FMSUBADD */
1633 {
1634 sub = SLP_TREE_CHILDREN (node)[l1];
1635 add = SLP_TREE_CHILDREN (node)[l0];
1636 }
1637 slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
1638 /* Modify the blend node in-place. */
1639 SLP_TREE_CHILDREN (node).safe_grow (3, true);
1640 SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
1641 SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
1642 SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
1643 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1644 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1645 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
1646
1647 /* Build IFN_VEC_FMADDSUB from the mul/sub representative operands. */
1648 stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
1649 stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
1650 gcall *call = gimple_build_call_internal (m_ifn, 3,
1651 gimple_assign_rhs1 (mrep->stmt),
1652 gimple_assign_rhs2 (mrep->stmt),
1653 gimple_assign_rhs2 (srep->stmt));
1654 gimple_call_set_lhs (call, make_ssa_name
1655 (TREE_TYPE (gimple_assign_lhs (srep->stmt))));
1656 gimple_call_set_nothrow (call, true);
1657 gimple_set_bb (call, gimple_bb (srep->stmt));
1658 stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep);
1659 SLP_TREE_REPRESENTATIVE (node) = new_rep;
1660 STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1661 STMT_SLP_TYPE (new_rep) = pure_slp;
1662 STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1663 STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1664 STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep));
1665 SLP_TREE_CODE (node) = ERROR_MARK;
1666 SLP_TREE_LANE_PERMUTATION (node).release ();
1667
1668 vect_free_slp_tree (sub);
1669 vect_free_slp_tree (add);
1670 break;
1671 }
1672 default:;
1673 }
7a6c31f0
RB
1674}
1675
3ed472af
TC
1676/*******************************************************************************
1677 * Pattern matching definitions
1678 ******************************************************************************/
1679
1680#define SLP_PATTERN(x) &x::recognize
1681vect_pattern_decl_t slp_patterns[]
1682{
1683 /* For least amount of back-tracking and more efficient matching
1684 order patterns from the largest to the smallest. Especially if they
1685 overlap in what they can detect. */
1686
b50df1e7 1687 SLP_PATTERN (complex_operations_pattern),
7a6c31f0 1688 SLP_PATTERN (addsub_pattern)
3ed472af
TC
1689};
1690#undef SLP_PATTERN
1691
1692/* Set the number of SLP pattern matchers available. */
1693size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(vect_pattern_decl_t);