]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp-patterns.cc
Don't build readline/libreadline.a, when --with-system-readline is supplied
[thirdparty/gcc.git] / gcc / tree-vect-slp-patterns.cc
CommitLineData
3ed472af 1/* SLP - Pattern matcher on SLP trees
7adcbafe 2 Copyright (C) 2020-2022 Free Software Foundation, Inc.
3ed472af
TC
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "target.h"
25#include "rtl.h"
26#include "tree.h"
27#include "gimple.h"
28#include "tree-pass.h"
29#include "ssa.h"
30#include "optabs-tree.h"
31#include "insn-config.h"
32#include "recog.h" /* FIXME: for insn_data */
33#include "fold-const.h"
34#include "stor-layout.h"
35#include "gimple-iterator.h"
36#include "cfgloop.h"
37#include "tree-vectorizer.h"
38#include "langhooks.h"
39#include "gimple-walk.h"
40#include "dbgcnt.h"
41#include "tree-vector-builder.h"
42#include "vec-perm-indices.h"
43#include "gimple-fold.h"
44#include "internal-fn.h"
45
46/* SLP Pattern matching mechanism.
47
48 This extension to the SLP vectorizer allows one to transform the generated SLP
49 tree based on any pattern. The difference between this and the normal vect
50 pattern matcher is that unlike the former, this matcher allows you to match
51 with instructions that do not belong to the same SSA dominator graph.
52
53 The only requirement that this pattern matcher has is that you are only
54 only allowed to either match an entire group or none.
55
56 The pattern matcher currently only allows you to perform replacements to
57 internal functions.
58
59 Once the patterns are matched it is one way, these cannot be undone. It is
60 currently not supported to match patterns recursively.
61
62 To add a new pattern, implement the vect_pattern class and add the type to
63 slp_patterns.
64
65*/
66
67/*******************************************************************************
68 * vect_pattern class
69 ******************************************************************************/
70
71/* Default implementation of recognize that performs matching, validation and
72 replacement of nodes but that can be overriden if required. */
73
74static bool
75vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
76{
77 tree vectype = SLP_TREE_VECTYPE (node);
78 if (ifn == IFN_LAST || !vectype)
79 return false;
80
81 if (dump_enabled_p ())
82 dump_printf_loc (MSG_NOTE, vect_location,
83 "Found %s pattern in SLP tree\n",
84 internal_fn_name (ifn));
85
86 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
87 {
88 if (dump_enabled_p ())
89 dump_printf_loc (MSG_NOTE, vect_location,
90 "Target supports %s vectorization with mode %T\n",
91 internal_fn_name (ifn), vectype);
92 }
93 else
94 {
95 if (dump_enabled_p ())
96 {
97 if (!vectype)
98 dump_printf_loc (MSG_NOTE, vect_location,
953e08fd
JJ
99 "Target does not support vector type for %G\n",
100 STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)));
3ed472af
TC
101 else
102 dump_printf_loc (MSG_NOTE, vect_location,
103 "Target does not support %s for vector type "
104 "%T\n", internal_fn_name (ifn), vectype);
105 }
106 return false;
107 }
108 return true;
109}
110
111/*******************************************************************************
112 * General helper types
113 ******************************************************************************/
114
115/* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
116 be matched when looking for expressions that we are interested matching for
117 complex numbers addition and mla. */
118
119typedef enum _complex_operation : unsigned {
120 PLUS_PLUS,
121 MINUS_PLUS,
122 PLUS_MINUS,
123 MULT_MULT,
124 CMPLX_NONE
125} complex_operation_t;
126
127/*******************************************************************************
128 * General helper functions
129 ******************************************************************************/
130
131/* Helper function of linear_loads_p that checks to see if the load permutation
132 is sequential and in monotonically increasing order of loads with no gaps.
133*/
134
135static inline complex_perm_kinds_t
136is_linear_load_p (load_permutation_t loads)
137{
138 if (loads.length() == 0)
139 return PERM_UNKNOWN;
140
141 unsigned load, i;
142 complex_perm_kinds_t candidates[4]
39666d2b 143 = { PERM_ODDODD
3ed472af 144 , PERM_EVENEVEN
39666d2b
TC
145 , PERM_EVENODD
146 , PERM_ODDEVEN
3ed472af
TC
147 };
148
149 int valid_patterns = 4;
39666d2b 150 FOR_EACH_VEC_ELT (loads, i, load)
3ed472af 151 {
55d83cdf
TC
152 unsigned adj_load = load % 2;
153 if (candidates[0] != PERM_UNKNOWN && adj_load != 1)
3ed472af
TC
154 {
155 candidates[0] = PERM_UNKNOWN;
156 valid_patterns--;
157 }
55d83cdf 158 if (candidates[1] != PERM_UNKNOWN && adj_load != 0)
3ed472af
TC
159 {
160 candidates[1] = PERM_UNKNOWN;
161 valid_patterns--;
162 }
39666d2b 163 if (candidates[2] != PERM_UNKNOWN && load != i)
3ed472af
TC
164 {
165 candidates[2] = PERM_UNKNOWN;
166 valid_patterns--;
167 }
39666d2b
TC
168 if (candidates[3] != PERM_UNKNOWN
169 && load != (i % 2 == 0 ? i + 1 : i - 1))
3ed472af
TC
170 {
171 candidates[3] = PERM_UNKNOWN;
172 valid_patterns--;
173 }
174
175 if (valid_patterns == 0)
176 return PERM_UNKNOWN;
177 }
178
179 for (i = 0; i < sizeof(candidates); i++)
180 if (candidates[i] != PERM_UNKNOWN)
181 return candidates[i];
182
183 return PERM_UNKNOWN;
184}
185
186/* Combine complex_perm_kinds A and B into a new permute kind that describes the
187 resulting operation. */
188
189static inline complex_perm_kinds_t
190vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
191{
192 if (a == b)
193 return a;
194
195 if (a == PERM_TOP)
196 return b;
197
198 if (b == PERM_TOP)
199 return a;
200
201 return PERM_UNKNOWN;
202}
203
204/* Check to see if all loads rooted in ROOT are linear. Linearity is
205 defined as having no gaps between values loaded. */
206
c3a2bc6d 207static complex_perm_kinds_t
3ed472af
TC
208linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
209{
210 if (!root)
c3a2bc6d 211 return PERM_UNKNOWN;
3ed472af
TC
212
213 unsigned i;
c3a2bc6d 214 complex_perm_kinds_t *tmp;
3ed472af
TC
215
216 if ((tmp = perm_cache->get (root)) != NULL)
217 return *tmp;
218
c3a2bc6d 219 complex_perm_kinds_t retval = PERM_UNKNOWN;
3ed472af
TC
220 perm_cache->put (root, retval);
221
222 /* If it's a load node, then just read the load permute. */
223 if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
224 {
c3a2bc6d 225 retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
3ed472af
TC
226 perm_cache->put (root, retval);
227 return retval;
228 }
229 else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
230 {
c3a2bc6d 231 retval = PERM_TOP;
bd4298e1 232 perm_cache->put (root, retval);
3ed472af
TC
233 return retval;
234 }
235
3ed472af
TC
236 complex_perm_kinds_t kind = PERM_TOP;
237
238 slp_tree child;
239 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
240 {
c3a2bc6d
TC
241 complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
242 kind = vect_merge_perms (kind, res);
159b0bd9 243 /* Unknown and Top are not valid on blends as they produce no permute. */
c3a2bc6d 244 retval = kind;
159b0bd9 245 if (kind == PERM_UNKNOWN || kind == PERM_TOP)
3ed472af 246 return retval;
3ed472af
TC
247 }
248
c3a2bc6d 249 retval = kind;
3ed472af
TC
250
251 perm_cache->put (root, retval);
252 return retval;
253}
254
255
256/* This function attempts to make a node rooted in NODE is linear. If the node
257 if already linear than the node itself is returned in RESULT.
258
259 If the node is not linear then a new VEC_PERM_EXPR node is created with a
260 lane permute that when applied will make the node linear. If such a
261 permute cannot be created then FALSE is returned from the function.
262
263 Here linearity is defined as having a sequential, monotically increasing
264 load position inside the load permute generated by the loads reachable from
265 NODE. */
266
267static slp_tree
268vect_build_swap_evenodd_node (slp_tree node)
269{
270 /* Attempt to linearise the permute. */
271 vec<std::pair<unsigned, unsigned> > zipped;
272 zipped.create (SLP_TREE_LANES (node));
273
274 for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
275 {
276 zipped.quick_push (std::make_pair (0, x+1));
277 zipped.quick_push (std::make_pair (0, x));
278 }
279
280 /* Create the new permute node and store it instead. */
281 slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
282 SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
283 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
284 SLP_TREE_CHILDREN (vnode).quick_push (node);
285 SLP_TREE_REF_COUNT (vnode) = 1;
286 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
287 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
288 SLP_TREE_REF_COUNT (node)++;
289 return vnode;
290}
291
292/* Checks to see of the expression represented by NODE is a gimple assign with
293 code CODE. */
294
295static inline bool
296vect_match_expression_p (slp_tree node, tree_code code)
297{
298 if (!node
299 || !SLP_TREE_REPRESENTATIVE (node))
300 return false;
301
302 gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
303 if (!is_gimple_assign (expr)
304 || gimple_assign_rhs_code (expr) != code)
305 return false;
306
307 return true;
308}
309
310/* Check if the given lane permute in PERMUTES matches an alternating sequence
311 of {even odd even odd ...}. This to account for unrolled loops. Further
312 mode there resulting permute must be linear. */
313
314static inline bool
315vect_check_evenodd_blend (lane_permutation_t &permutes,
316 unsigned even, unsigned odd)
317{
9c68e2ab
TC
318 if (permutes.length () == 0
319 || permutes.length () % 2 != 0)
3ed472af
TC
320 return false;
321
322 unsigned val[2] = {even, odd};
323 unsigned seed = 0;
324 for (unsigned i = 0; i < permutes.length (); i++)
325 if (permutes[i].first != val[i % 2]
326 || permutes[i].second != seed++)
327 return false;
328
329 return true;
330}
331
332/* This function will match the two gimple expressions representing NODE1 and
333 NODE2 in parallel and returns the pair operation that represents the two
334 expressions in the two statements.
335
336 If match is successful then the corresponding complex_operation is
337 returned and the arguments to the two matched operations are returned in OPS.
338
339 If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
340 from the two nodes alternatingly.
341
342 If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
343
344 e.g. the following gimple statements
345
346 stmt 0 _39 = _37 + _12;
347 stmt 1 _6 = _38 - _36;
348
349 will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
350*/
351
352static complex_operation_t
353vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
354 bool two_operands = true, vec<slp_tree> *ops = NULL)
355{
356 complex_operation_t result = CMPLX_NONE;
357
358 if (vect_match_expression_p (node1, MINUS_EXPR)
359 && vect_match_expression_p (node2, PLUS_EXPR)
360 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
361 result = MINUS_PLUS;
362 else if (vect_match_expression_p (node1, PLUS_EXPR)
363 && vect_match_expression_p (node2, MINUS_EXPR)
364 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
365 result = PLUS_MINUS;
366 else if (vect_match_expression_p (node1, PLUS_EXPR)
367 && vect_match_expression_p (node2, PLUS_EXPR))
368 result = PLUS_PLUS;
369 else if (vect_match_expression_p (node1, MULT_EXPR)
370 && vect_match_expression_p (node2, MULT_EXPR))
371 result = MULT_MULT;
372
373 if (result != CMPLX_NONE && ops != NULL)
374 {
ed3de62a
TC
375 if (two_operands)
376 {
377 auto l0node = SLP_TREE_CHILDREN (node1);
378 auto l1node = SLP_TREE_CHILDREN (node2);
379
380 /* Check if the tree is connected as we expect it. */
381 if (!((l0node[0] == l1node[0] && l0node[1] == l1node[1])
382 || (l0node[0] == l1node[1] && l0node[1] == l1node[0])))
383 return CMPLX_NONE;
384 }
5296bd57
TC
385 ops->safe_push (node1);
386 ops->safe_push (node2);
3ed472af
TC
387 }
388 return result;
389}
390
391/* Overload of vect_detect_pair_op that matches against the representative
392 statements in the children of NODE. It is expected that NODE has exactly
393 two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */
394
395static complex_operation_t
396vect_detect_pair_op (slp_tree node, bool two_operands = true,
397 vec<slp_tree> *ops = NULL)
398{
399 if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR)
400 return CMPLX_NONE;
401
402 if (SLP_TREE_CHILDREN (node).length () != 2)
403 return CMPLX_NONE;
404
405 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
406 lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
407
408 return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
409 ops);
410}
411
412/*******************************************************************************
413 * complex_pattern class
414 ******************************************************************************/
415
416/* SLP Complex Numbers pattern matching.
417
418 As an example, the following simple loop:
419
420 double a[restrict N]; double b[restrict N]; double c[restrict N];
421
422 for (int i=0; i < N; i+=2)
423 {
424 c[i] = a[i] - b[i+1];
425 c[i+1] = a[i+1] + b[i];
426 }
427
428 which represents a complex addition on with a rotation of 90* around the
429 argand plane. i.e. if `a` and `b` were complex numbers then this would be the
430 same as `a + (b * I)`.
431
432 Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
433 both recognized in order for the pattern to work. As an SLP tree this is
434 represented as
435
436 +--------------------------------+
437 | stmt 0 *_9 = _10; |
438 | stmt 1 *_15 = _16; |
439 +--------------------------------+
440 |
441 |
442 v
443 +--------------------------------+
444 | stmt 0 _10 = _4 - _8; |
445 | stmt 1 _16 = _12 + _14; |
446 | lane permutation { 0[0] 1[1] } |
447 +--------------------------------+
448 | |
449 | |
450 | |
451 +-----+ | | +-----+
452 | | | | | |
453 +-----| { } |<-----+ +----->| { } --------+
454 | | | +------------------| | |
455 | +-----+ | +-----+ |
456 | | | |
457 | | | |
458 | +------|------------------+ |
459 | | | |
460 v v v v
461 +--------------------------+ +--------------------------------+
462 | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; |
463 | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; |
464 | load permutation { 1 0 } | | load permutation { 0 1 } |
465 +--------------------------+ +--------------------------------+
466
467 The pattern matcher allows you to replace both statements 0 and 1 or none at
468 all. Because this operation is a two operands operation the actual nodes
469 being replaced are those in the { } nodes. The actual scalar statements
470 themselves are not replaced or used during the matching but instead the
471 SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to
472 replace and match on any number of nodes.
473
474 Because the pattern matcher matches on the representative statement for the
475 SLP node the case of two_operators it allows you to match the children of the
476 node. This is done using the method `recognize ()`.
477
478*/
479
480/* The complex_pattern class contains common code for pattern matchers that work
481 on complex numbers. These provide functionality to allow de-construction and
482 validation of sequences depicting/transforming REAL and IMAG pairs. */
483
484class complex_pattern : public vect_pattern
485{
486 protected:
487 auto_vec<slp_tree> m_workset;
488 complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
489 : vect_pattern (node, m_ops, ifn)
490 {
491 this->m_workset.safe_push (*node);
492 }
493
494 public:
4665cfbc 495 void build (vec_info *) override;
3ed472af
TC
496
497 static internal_fn
0c18faac 498 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
3ed472af
TC
499 vec<slp_tree> *);
500};
501
502/* Create a replacement pattern statement for each node in m_node and inserts
503 the new statement into m_node as the new representative statement. The old
504 statement is marked as being in a pattern defined by the new statement. The
505 statement is created as call to internal function IFN with m_num_args
506 arguments.
507
508 Futhermore the new pattern is also added to the vectorization information
509 structure VINFO and the old statement STMT_INFO is marked as unused while
510 the new statement is marked as used and the number of SLP uses of the new
511 statement is incremented.
512
513 The newly created SLP nodes are marked as SLP only and will be dissolved
514 if SLP is aborted.
515
516 The newly created gimple call is returned and the BB remains unchanged.
517
518 This default method is designed to only match against simple operands where
519 all the input and output types are the same.
520*/
521
522void
523complex_pattern::build (vec_info *vinfo)
524{
525 stmt_vec_info stmt_info;
526
527 auto_vec<tree> args;
528 args.create (this->m_num_args);
529 args.quick_grow_cleared (this->m_num_args);
530 slp_tree node;
531 unsigned ix;
532 stmt_vec_info call_stmt_info;
533 gcall *call_stmt = NULL;
534
535 /* Now modify the nodes themselves. */
536 FOR_EACH_VEC_ELT (this->m_workset, ix, node)
537 {
538 /* Calculate the location of the statement in NODE to replace. */
539 stmt_info = SLP_TREE_REPRESENTATIVE (node);
374f93da
RB
540 stmt_vec_info reduc_def
541 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
3ed472af
TC
542 gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
543 tree lhs_old_stmt = gimple_get_lhs (old_stmt);
544 tree type = TREE_TYPE (lhs_old_stmt);
545
546 /* Create the argument set for use by gimple_build_call_internal_vec. */
547 for (unsigned i = 0; i < this->m_num_args; i++)
548 args[i] = lhs_old_stmt;
549
550 /* Create the new pattern statements. */
551 call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
552 tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
553 gimple_call_set_lhs (call_stmt, var);
554 gimple_set_location (call_stmt, gimple_location (old_stmt));
555 gimple_call_set_nothrow (call_stmt, true);
556
557 /* Adjust the book-keeping for the new and old statements for use during
558 SLP. This is required to get the right VF and statement during SLP
559 analysis. These changes are created after relevancy has been set for
560 the nodes as such we need to manually update them. Any changes will be
561 undone if SLP is cancelled. */
562 call_stmt_info
563 = vinfo->add_pattern_stmt (call_stmt, stmt_info);
564
565 /* Make sure to mark the representative statement pure_slp and
374f93da 566 relevant and transfer reduction info. */
3ed472af
TC
567 STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
568 STMT_SLP_TYPE (call_stmt_info) = pure_slp;
374f93da 569 STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def;
3ed472af 570
a29124d2
RB
571 gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
572 STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
5e606ed9 573 STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true;
3ed472af
TC
574
575 /* Since we are replacing all the statements in the group with the same
576 thing it doesn't really matter. So just set it every time a new stmt
577 is created. */
578 SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
579 SLP_TREE_LANE_PERMUTATION (node).release ();
580 SLP_TREE_CODE (node) = CALL_EXPR;
581 }
582}
583
584/*******************************************************************************
585 * complex_add_pattern class
586 ******************************************************************************/
587
588class complex_add_pattern : public complex_pattern
589{
590 protected:
591 complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
592 : complex_pattern (node, m_ops, ifn)
593 {
594 this->m_num_args = 2;
595 }
596
597 public:
4665cfbc 598 void build (vec_info *) final override;
3ed472af 599 static internal_fn
55d83cdf
TC
600 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
601 slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
3ed472af
TC
602
603 static vect_pattern*
55d83cdf
TC
604 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
605 slp_tree *);
0c18faac
TC
606
607 static vect_pattern*
608 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
609 {
610 return new complex_add_pattern (node, m_ops, ifn);
611 }
3ed472af
TC
612};
613
614/* Perform a replacement of the detected complex add pattern with the new
615 instruction sequences. */
616
617void
618complex_add_pattern::build (vec_info *vinfo)
619{
fe701195
TC
620 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
621
3ed472af
TC
622 slp_tree node = this->m_ops[0];
623 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
624
625 /* First re-arrange the children. */
fe701195
TC
626 SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
627 SLP_TREE_CHILDREN (*this->m_node)[1] =
628 vect_build_swap_evenodd_node (children[1]);
3ed472af 629
fe701195
TC
630 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
631 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
0c18faac
TC
632 vect_free_slp_tree (this->m_ops[0]);
633 vect_free_slp_tree (this->m_ops[1]);
634
3ed472af
TC
635 complex_pattern::build (vinfo);
636}
637
638/* Pattern matcher for trying to match complex addition pattern in SLP tree.
639
640 If no match is found then IFN is set to IFN_LAST.
641 This function matches the patterns shaped as:
642
643 c[i] = a[i] - b[i+1];
644 c[i+1] = a[i+1] + b[i];
645
646 If a match occurred then TRUE is returned, else FALSE. The initial match is
647 expected to be in OP1 and the initial match operands in args0. */
648
649internal_fn
650complex_add_pattern::matches (complex_operation_t op,
651 slp_tree_to_load_perm_map_t *perm_cache,
55d83cdf 652 slp_compat_nodes_map_t * /* compat_cache */,
0c18faac 653 slp_tree *node, vec<slp_tree> *ops)
3ed472af
TC
654{
655 internal_fn ifn = IFN_LAST;
656
657 /* Find the two components. Rotation in the complex plane will modify
658 the operations:
659
660 * Rotation 0: + +
661 * Rotation 90: - +
662 * Rotation 180: - -
663 * Rotation 270: + -
664
665 Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
666 to care about them here. */
667 if (op == MINUS_PLUS)
668 ifn = IFN_COMPLEX_ADD_ROT90;
669 else if (op == PLUS_MINUS)
670 ifn = IFN_COMPLEX_ADD_ROT270;
671 else
672 return ifn;
673
674 /* verify that there is a permute, otherwise this isn't a pattern we
675 we support. */
676 gcc_assert (ops->length () == 2);
677
678 vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
679
680 /* First node must be unpermuted. */
c3a2bc6d 681 if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
3ed472af
TC
682 return IFN_LAST;
683
684 /* Second node must be permuted. */
c3a2bc6d 685 if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
3ed472af
TC
686 return IFN_LAST;
687
0c18faac
TC
688 if (!vect_pattern_validate_optab (ifn, *node))
689 return IFN_LAST;
690
3ed472af
TC
691 return ifn;
692}
693
694/* Attempt to recognize a complex add pattern. */
695
696vect_pattern*
697complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
55d83cdf 698 slp_compat_nodes_map_t *compat_cache,
3ed472af
TC
699 slp_tree *node)
700{
701 auto_vec<slp_tree> ops;
702 complex_operation_t op
703 = vect_detect_pair_op (*node, true, &ops);
0c18faac 704 internal_fn ifn
55d83cdf 705 = complex_add_pattern::matches (op, perm_cache, compat_cache, node, &ops);
0c18faac 706 if (ifn == IFN_LAST)
3ed472af
TC
707 return NULL;
708
709 return new complex_add_pattern (node, &ops, ifn);
710}
711
e09173d8
TC
712/*******************************************************************************
713 * complex_mul_pattern
714 ******************************************************************************/
715
55d83cdf 716/* Helper function to check if PERM is KIND or PERM_TOP. */
e09173d8
TC
717
718static inline bool
55d83cdf
TC
719is_eq_or_top (slp_tree_to_load_perm_map_t *perm_cache,
720 slp_tree op1, complex_perm_kinds_t kind1,
721 slp_tree op2, complex_perm_kinds_t kind2)
e09173d8 722{
55d83cdf
TC
723 complex_perm_kinds_t perm1 = linear_loads_p (perm_cache, op1);
724 if (perm1 != kind1 && perm1 != PERM_TOP)
725 return false;
e09173d8 726
55d83cdf
TC
727 complex_perm_kinds_t perm2 = linear_loads_p (perm_cache, op2);
728 if (perm2 != kind2 && perm2 != PERM_TOP)
729 return false;
e09173d8 730
55d83cdf 731 return true;
e09173d8
TC
732}
733
55d83cdf 734enum _conj_status { CONJ_NONE, CONJ_FST, CONJ_SND };
e09173d8
TC
735
736static inline bool
55d83cdf
TC
737compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache,
738 slp_tree a, int *pa, slp_tree b, int *pb)
e09173d8 739{
55d83cdf
TC
740 bool *tmp;
741 std::pair<slp_tree, slp_tree> key = std::make_pair(a, b);
742 if ((tmp = compat_cache->get (key)) != NULL)
743 return *tmp;
e09173d8 744
55d83cdf 745 compat_cache->put (key, false);
e09173d8 746
55d83cdf
TC
747 if (SLP_TREE_CHILDREN (a).length () != SLP_TREE_CHILDREN (b).length ())
748 return false;
e09173d8 749
55d83cdf
TC
750 if (SLP_TREE_DEF_TYPE (a) != SLP_TREE_DEF_TYPE (b))
751 return false;
752
753 /* Only internal nodes can be loads, as such we can't check further if they
754 are externals. */
755 if (SLP_TREE_DEF_TYPE (a) != vect_internal_def)
e09173d8 756 {
55d83cdf
TC
757 for (unsigned i = 0; i < SLP_TREE_SCALAR_OPS (a).length (); i++)
758 {
759 tree op1 = SLP_TREE_SCALAR_OPS (a)[pa[i % 2]];
760 tree op2 = SLP_TREE_SCALAR_OPS (b)[pb[i % 2]];
761 if (!operand_equal_p (op1, op2, 0))
762 return false;
763 }
764
765 compat_cache->put (key, true);
766 return true;
e09173d8 767 }
55d83cdf
TC
768
769 auto a_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (a));
770 auto b_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (b));
771
772 if (gimple_code (a_stmt) != gimple_code (b_stmt))
773 return false;
774
775 /* code, children, type, externals, loads, constants */
776 if (gimple_num_args (a_stmt) != gimple_num_args (b_stmt))
777 return false;
778
779 /* At this point, a and b are known to be the same gimple operations. */
780 if (is_gimple_call (a_stmt))
781 {
782 if (!compatible_calls_p (dyn_cast <gcall *> (a_stmt),
783 dyn_cast <gcall *> (b_stmt)))
784 return false;
785 }
786 else if (!is_gimple_assign (a_stmt))
787 return false;
e09173d8
TC
788 else
789 {
55d83cdf
TC
790 tree_code acode = gimple_assign_rhs_code (a_stmt);
791 tree_code bcode = gimple_assign_rhs_code (b_stmt);
792 if ((acode == REALPART_EXPR || acode == IMAGPART_EXPR)
793 && (bcode == REALPART_EXPR || bcode == IMAGPART_EXPR))
794 return true;
795
796 if (acode != bcode)
e09173d8
TC
797 return false;
798 }
799
55d83cdf
TC
800 if (!SLP_TREE_LOAD_PERMUTATION (a).exists ()
801 || !SLP_TREE_LOAD_PERMUTATION (b).exists ())
e09173d8 802 {
55d83cdf
TC
803 for (unsigned i = 0; i < gimple_num_args (a_stmt); i++)
804 {
805 tree t1 = gimple_arg (a_stmt, i);
806 tree t2 = gimple_arg (b_stmt, i);
807 if (TREE_CODE (t1) != TREE_CODE (t2))
808 return false;
809
810 /* If SSA name then we will need to inspect the children
811 so we can punt here. */
812 if (TREE_CODE (t1) == SSA_NAME)
813 continue;
814
815 if (!operand_equal_p (t1, t2, 0))
816 return false;
817 }
e09173d8 818 }
55d83cdf 819 else
e09173d8 820 {
55d83cdf
TC
821 auto dr1 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a));
822 auto dr2 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b));
823 /* Don't check the last dimension as that's checked by the lineary
824 checks. This check is also much stricter than what we need
825 because it doesn't consider loading from adjacent elements
826 in the same struct as loading from the same base object.
827 But for now, I'll play it safe. */
828 if (!same_data_refs (dr1, dr2, 1))
e09173d8
TC
829 return false;
830 }
55d83cdf
TC
831
832 for (unsigned i = 0; i < SLP_TREE_CHILDREN (a).length (); i++)
487d604b 833 {
55d83cdf
TC
834 if (!compatible_complex_nodes_p (compat_cache,
835 SLP_TREE_CHILDREN (a)[i], pa,
836 SLP_TREE_CHILDREN (b)[i], pb))
487d604b 837 return false;
487d604b 838 }
e09173d8 839
55d83cdf 840 compat_cache->put (key, true);
e09173d8
TC
841 return true;
842}
843
e09173d8
TC
844static inline bool
845vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
55d83cdf
TC
846 slp_compat_nodes_map_t *compat_cache,
847 vec<slp_tree> &left_op,
848 vec<slp_tree> &right_op,
849 bool subtract,
850 enum _conj_status *_status)
e09173d8 851{
55d83cdf
TC
852 auto_vec<slp_tree> ops;
853 enum _conj_status stats = CONJ_NONE;
854
855 /* The complex operations can occur in two layouts and two permute sequences
856 so declare them and re-use them. */
857 int styles[][4] = { { 0, 2, 1, 3} /* {L1, R1} + {L2, R2}. */
858 , { 0, 3, 1, 2} /* {L1, R2} + {L2, R1}. */
859 };
860
861 /* Now for the corresponding permutes that go with these values. */
862 complex_perm_kinds_t perms[][4]
863 = { { PERM_EVENEVEN, PERM_ODDODD, PERM_EVENODD, PERM_ODDEVEN }
864 , { PERM_EVENODD, PERM_ODDEVEN, PERM_EVENEVEN, PERM_ODDODD }
865 };
866
867 /* These permutes are used during comparisons of externals on which
868 we require strict equality. */
869 int cq[][4][2]
870 = { { { 0, 0 }, { 1, 1 }, { 0, 1 }, { 1, 0 } }
871 , { { 0, 1 }, { 1, 0 }, { 0, 0 }, { 1, 1 } }
872 };
873
874 /* Default to style and perm 0, most operations use this one. */
875 int style = 0;
876 int perm = subtract ? 1 : 0;
877
878 /* Check if we have a negate operation, if so absorb the node and continue
879 looking. */
880 bool neg0 = vect_match_expression_p (right_op[0], NEGATE_EXPR);
881 bool neg1 = vect_match_expression_p (right_op[1], NEGATE_EXPR);
882
883 /* Determine which style we're looking at. We only have different ones
884 whenever a conjugate is involved. */
885 if (neg0 && neg1)
886 ;
887 else if (neg0)
e09173d8 888 {
55d83cdf
TC
889 right_op[0] = SLP_TREE_CHILDREN (right_op[0])[0];
890 stats = CONJ_FST;
891 if (subtract)
892 perm = 0;
e09173d8 893 }
55d83cdf
TC
894 else if (neg1)
895 {
896 right_op[1] = SLP_TREE_CHILDREN (right_op[1])[0];
897 stats = CONJ_SND;
898 perm = 1;
899 }
900
901 *_status = stats;
902
903 /* Flatten the inputs after we've remapped them. */
904 ops.create (4);
905 ops.safe_splice (left_op);
906 ops.safe_splice (right_op);
907
908 /* Extract out the elements to check. */
909 slp_tree op0 = ops[styles[style][0]];
910 slp_tree op1 = ops[styles[style][1]];
911 slp_tree op2 = ops[styles[style][2]];
912 slp_tree op3 = ops[styles[style][3]];
913
914 /* Do cheapest test first. If failed no need to analyze further. */
915 if (linear_loads_p (perm_cache, op0) != perms[perm][0]
916 || linear_loads_p (perm_cache, op1) != perms[perm][1]
917 || !is_eq_or_top (perm_cache, op2, perms[perm][2], op3, perms[perm][3]))
918 return false;
919
920 return compatible_complex_nodes_p (compat_cache, op0, cq[perm][0], op1,
921 cq[perm][1])
922 && compatible_complex_nodes_p (compat_cache, op2, cq[perm][2], op3,
923 cq[perm][3]);
e09173d8
TC
924}
925
926/* This function combines two nodes containing only even and only odd lanes
927 together into a single node which contains the nodes in even/odd order
928 by using a lane permute.
929
930 The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
931 So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
932
933 The tree REPRESENTATION is taken from the supplied REP along with the
934 vectype which must be the same between all three nodes.
935*/
936
937static slp_tree
938vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
939{
940 vec<std::pair<unsigned, unsigned> > perm;
941 perm.create (SLP_TREE_LANES (rep));
942
943 for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
944 {
945 perm.quick_push (std::make_pair (0, x));
946 perm.quick_push (std::make_pair (1, x+1));
947 }
948
949 slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even));
950 SLP_TREE_CODE (vnode) = VEC_PERM_EXPR;
951 SLP_TREE_LANE_PERMUTATION (vnode) = perm;
952
953 SLP_TREE_CHILDREN (vnode).create (2);
954 SLP_TREE_CHILDREN (vnode).quick_push (even);
955 SLP_TREE_CHILDREN (vnode).quick_push (odd);
956 SLP_TREE_REF_COUNT (even)++;
957 SLP_TREE_REF_COUNT (odd)++;
958 SLP_TREE_REF_COUNT (vnode) = 1;
959
960 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
961 gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
962 /* Representation is set to that of the current node as the vectorizer
963 can't deal with VEC_PERMs with no representation, as would be the
964 case with invariants. */
965 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
966 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
967 return vnode;
968}
969
970class complex_mul_pattern : public complex_pattern
971{
972 protected:
973 complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
974 : complex_pattern (node, m_ops, ifn)
975 {
976 this->m_num_args = 2;
977 }
978
979 public:
4665cfbc 980 void build (vec_info *) final override;
e09173d8 981 static internal_fn
55d83cdf
TC
982 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
983 slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
e09173d8
TC
984
985 static vect_pattern*
55d83cdf
TC
986 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
987 slp_tree *);
e09173d8
TC
988
989 static vect_pattern*
990 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
991 {
992 return new complex_mul_pattern (node, m_ops, ifn);
993 }
994
995};
996
ed3de62a
TC
997/* Pattern matcher for trying to match complex multiply and complex multiply
998 and accumulate pattern in SLP tree. If the operation matches then IFN
999 is set to the operation it matched and the arguments to the two
1000 replacement statements are put in m_ops.
e09173d8
TC
1001
1002 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1003
1004 This function matches the patterns shaped as:
1005
1006 double ax = (b[i+1] * a[i]);
1007 double bx = (a[i+1] * b[i]);
1008
1009 c[i] = c[i] - ax;
1010 c[i+1] = c[i+1] + bx;
1011
1012 If a match occurred then TRUE is returned, else FALSE. The initial match is
1013 expected to be in OP1 and the initial match operands in args0. */
1014
1015internal_fn
1016complex_mul_pattern::matches (complex_operation_t op,
1017 slp_tree_to_load_perm_map_t *perm_cache,
55d83cdf 1018 slp_compat_nodes_map_t *compat_cache,
e09173d8
TC
1019 slp_tree *node, vec<slp_tree> *ops)
1020{
1021 internal_fn ifn = IFN_LAST;
1022
1023 if (op != MINUS_PLUS)
1024 return IFN_LAST;
1025
ed3de62a
TC
1026 auto childs = *ops;
1027 auto l0node = SLP_TREE_CHILDREN (childs[0]);
ed3de62a
TC
1028
1029 bool mul0 = vect_match_expression_p (l0node[0], MULT_EXPR);
1030 bool mul1 = vect_match_expression_p (l0node[1], MULT_EXPR);
487d604b 1031 if (!mul0 && !mul1)
e09173d8
TC
1032 return IFN_LAST;
1033
1034 /* Now operand2+4 may lead to another expression. */
1035 auto_vec<slp_tree> left_op, right_op;
ed3de62a
TC
1036 slp_tree add0 = NULL;
1037
1038 /* Check if we may be a multiply add. */
1039 if (!mul0
1040 && vect_match_expression_p (l0node[0], PLUS_EXPR))
1041 {
1042 auto vals = SLP_TREE_CHILDREN (l0node[0]);
1043 /* Check if it's a multiply, otherwise no idea what this is. */
487d604b 1044 if (!(mul0 = vect_match_expression_p (vals[1], MULT_EXPR)))
ed3de62a 1045 return IFN_LAST;
e09173d8 1046
ed3de62a
TC
1047 /* Check if the ADD is linear, otherwise it's not valid complex FMA. */
1048 if (linear_loads_p (perm_cache, vals[0]) != PERM_EVENODD)
1049 return IFN_LAST;
1050
1051 left_op.safe_splice (SLP_TREE_CHILDREN (vals[1]));
1052 add0 = vals[0];
1053 }
1054 else
1055 left_op.safe_splice (SLP_TREE_CHILDREN (l0node[0]));
1056
1057 right_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
1058
1059 if (left_op.length () != 2
1060 || right_op.length () != 2
487d604b
TC
1061 || !mul0
1062 || !mul1
ed3de62a 1063 || linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
e09173d8
TC
1064 return IFN_LAST;
1065
55d83cdf
TC
1066 enum _conj_status status;
1067 if (!vect_validate_multiplication (perm_cache, compat_cache, left_op,
1068 right_op, false, &status))
1069 return IFN_LAST;
e09173d8 1070
55d83cdf 1071 if (status == CONJ_NONE)
e09173d8 1072 {
487d604b 1073 if (add0)
ed3de62a
TC
1074 ifn = IFN_COMPLEX_FMA;
1075 else
1076 ifn = IFN_COMPLEX_MUL;
e09173d8 1077 }
ed3de62a 1078 else
e09173d8 1079 {
487d604b 1080 if(add0)
ed3de62a
TC
1081 ifn = IFN_COMPLEX_FMA_CONJ;
1082 else
1083 ifn = IFN_COMPLEX_MUL_CONJ;
e09173d8
TC
1084 }
1085
1086 if (!vect_pattern_validate_optab (ifn, *node))
1087 return IFN_LAST;
1088
1089 ops->truncate (0);
ed3de62a
TC
1090 ops->create (add0 ? 4 : 3);
1091
1092 if (add0)
1093 ops->quick_push (add0);
e09173d8 1094
c3a2bc6d 1095 complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]);
55d83cdf 1096 if (kind == PERM_EVENODD || kind == PERM_TOP)
e09173d8
TC
1097 {
1098 ops->quick_push (left_op[1]);
1099 ops->quick_push (right_op[1]);
1100 ops->quick_push (left_op[0]);
1101 }
55d83cdf 1102 else if (kind == PERM_EVENEVEN && status != CONJ_SND)
e09173d8
TC
1103 {
1104 ops->quick_push (left_op[0]);
1105 ops->quick_push (right_op[0]);
1106 ops->quick_push (left_op[1]);
1107 }
1108 else
1109 {
1110 ops->quick_push (left_op[0]);
1111 ops->quick_push (right_op[1]);
1112 ops->quick_push (left_op[1]);
1113 }
1114
1115 return ifn;
1116}
1117
1118/* Attempt to recognize a complex mul pattern. */
1119
1120vect_pattern*
1121complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
55d83cdf 1122 slp_compat_nodes_map_t *compat_cache,
e09173d8
TC
1123 slp_tree *node)
1124{
1125 auto_vec<slp_tree> ops;
1126 complex_operation_t op
1127 = vect_detect_pair_op (*node, true, &ops);
1128 internal_fn ifn
55d83cdf 1129 = complex_mul_pattern::matches (op, perm_cache, compat_cache, node, &ops);
e09173d8
TC
1130 if (ifn == IFN_LAST)
1131 return NULL;
1132
1133 return new complex_mul_pattern (node, &ops, ifn);
1134}
1135
1136/* Perform a replacement of the detected complex mul pattern with the new
1137 instruction sequences. */
1138
1139void
1140complex_mul_pattern::build (vec_info *vinfo)
1141{
1142 slp_tree node;
1143 unsigned i;
ed3de62a
TC
1144 switch (this->m_ifn)
1145 {
1146 case IFN_COMPLEX_MUL:
1147 case IFN_COMPLEX_MUL_CONJ:
1148 {
1149 slp_tree newnode
1150 = vect_build_combine_node (this->m_ops[0], this->m_ops[1],
1151 *this->m_node);
1152 SLP_TREE_REF_COUNT (this->m_ops[2])++;
1153
1154 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1155 vect_free_slp_tree (node);
1156
1157 /* First re-arrange the children. */
1158 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
1159 SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
1160 SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1161 break;
1162 }
1163 case IFN_COMPLEX_FMA:
1164 case IFN_COMPLEX_FMA_CONJ:
1165 {
1166 SLP_TREE_REF_COUNT (this->m_ops[0])++;
1167 slp_tree newnode
1168 = vect_build_combine_node (this->m_ops[1], this->m_ops[2],
1169 *this->m_node);
1170 SLP_TREE_REF_COUNT (this->m_ops[3])++;
1171
1172 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1173 vect_free_slp_tree (node);
1174
1175 /* First re-arrange the children. */
1176 SLP_TREE_CHILDREN (*this->m_node).safe_grow (3);
55d83cdf
TC
1177 SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[3];
1178 SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1179 SLP_TREE_CHILDREN (*this->m_node)[2] = this->m_ops[0];
ed3de62a
TC
1180
1181 /* Tell the builder to expect an extra argument. */
1182 this->m_num_args++;
1183 break;
1184 }
1185 default:
1186 gcc_unreachable ();
1187 }
e09173d8
TC
1188
1189 /* And then rewrite the node itself. */
1190 complex_pattern::build (vinfo);
1191}
1192
478e571a
TC
1193/*******************************************************************************
1194 * complex_fms_pattern class
1195 ******************************************************************************/
1196
1197class complex_fms_pattern : public complex_pattern
1198{
1199 protected:
1200 complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1201 : complex_pattern (node, m_ops, ifn)
1202 {
1203 this->m_num_args = 3;
1204 }
1205
1206 public:
4665cfbc 1207 void build (vec_info *) final override;
478e571a 1208 static internal_fn
55d83cdf
TC
1209 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
1210 slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
478e571a
TC
1211
1212 static vect_pattern*
55d83cdf
TC
1213 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1214 slp_tree *);
478e571a
TC
1215
1216 static vect_pattern*
1217 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1218 {
1219 return new complex_fms_pattern (node, m_ops, ifn);
1220 }
1221};
1222
1223
ed3de62a
TC
1224/* Pattern matcher for trying to match complex multiply and subtract pattern
1225 in SLP tree. If the operation matches then IFN is set to the operation
1226 it matched and the arguments to the two replacement statements are put in
1227 m_ops.
478e571a
TC
1228
1229 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1230
1231 This function matches the patterns shaped as:
1232
1233 double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1234 double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1235
1236 c[i] = c[i] - ax;
1237 c[i+1] = c[i+1] + bx;
1238
1239 If a match occurred then TRUE is returned, else FALSE. The initial match is
1240 expected to be in OP1 and the initial match operands in args0. */
1241
1242internal_fn
1243complex_fms_pattern::matches (complex_operation_t op,
1244 slp_tree_to_load_perm_map_t *perm_cache,
55d83cdf 1245 slp_compat_nodes_map_t *compat_cache,
478e571a
TC
1246 slp_tree * ref_node, vec<slp_tree> *ops)
1247{
1248 internal_fn ifn = IFN_LAST;
1249
478e571a
TC
1250 /* We need to ignore the two_operands nodes that may also match,
1251 for that we can check if they have any scalar statements and also
1252 check that it's not a permute node as we're looking for a normal
ed3de62a
TC
1253 MINUS_EXPR operation. */
1254 if (op != CMPLX_NONE)
478e571a
TC
1255 return IFN_LAST;
1256
ed3de62a
TC
1257 slp_tree root = *ref_node;
1258 if (!vect_match_expression_p (root, MINUS_EXPR))
478e571a
TC
1259 return IFN_LAST;
1260
55d83cdf
TC
1261 /* TODO: Support invariants here, with the new layout CADD now
1262 can match before we get a chance to try CFMS. */
ed3de62a
TC
1263 auto nodes = SLP_TREE_CHILDREN (root);
1264 if (!vect_match_expression_p (nodes[1], MULT_EXPR)
1265 || vect_detect_pair_op (nodes[0]) != PLUS_MINUS)
478e571a
TC
1266 return IFN_LAST;
1267
ed3de62a
TC
1268 auto childs = SLP_TREE_CHILDREN (nodes[0]);
1269 auto l0node = SLP_TREE_CHILDREN (childs[0]);
ed3de62a 1270
478e571a
TC
1271 /* Now operand2+4 may lead to another expression. */
1272 auto_vec<slp_tree> left_op, right_op;
ed3de62a
TC
1273 left_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
1274 right_op.safe_splice (SLP_TREE_CHILDREN (nodes[1]));
478e571a 1275
6cc8aa65
TC
1276 /* If these nodes don't have any children then they're
1277 not ones we're interested in. */
4f0a2f5a
TC
1278 if (left_op.length () != 2
1279 || right_op.length () != 2
1280 || !vect_match_expression_p (l0node[1], MULT_EXPR))
6cc8aa65
TC
1281 return IFN_LAST;
1282
55d83cdf
TC
1283 enum _conj_status status;
1284 if (!vect_validate_multiplication (perm_cache, compat_cache, right_op,
1285 left_op, true, &status))
478e571a
TC
1286 return IFN_LAST;
1287
55d83cdf 1288 if (status == CONJ_NONE)
478e571a 1289 ifn = IFN_COMPLEX_FMS;
55d83cdf 1290 else
478e571a
TC
1291 ifn = IFN_COMPLEX_FMS_CONJ;
1292
1293 if (!vect_pattern_validate_optab (ifn, *ref_node))
1294 return IFN_LAST;
1295
1296 ops->truncate (0);
1297 ops->create (4);
1298
c3a2bc6d 1299 complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]);
478e571a
TC
1300 if (kind == PERM_EVENODD)
1301 {
ed3de62a 1302 ops->quick_push (l0node[0]);
478e571a
TC
1303 ops->quick_push (right_op[0]);
1304 ops->quick_push (right_op[1]);
1305 ops->quick_push (left_op[1]);
1306 }
478e571a
TC
1307 else
1308 {
ed3de62a 1309 ops->quick_push (l0node[0]);
478e571a
TC
1310 ops->quick_push (right_op[1]);
1311 ops->quick_push (right_op[0]);
55d83cdf 1312 ops->quick_push (left_op[0]);
478e571a
TC
1313 }
1314
1315 return ifn;
1316}
1317
1318/* Attempt to recognize a complex mul pattern. */
1319
1320vect_pattern*
1321complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
55d83cdf 1322 slp_compat_nodes_map_t *compat_cache,
478e571a
TC
1323 slp_tree *node)
1324{
1325 auto_vec<slp_tree> ops;
1326 complex_operation_t op
1327 = vect_detect_pair_op (*node, true, &ops);
1328 internal_fn ifn
55d83cdf 1329 = complex_fms_pattern::matches (op, perm_cache, compat_cache, node, &ops);
478e571a
TC
1330 if (ifn == IFN_LAST)
1331 return NULL;
1332
1333 return new complex_fms_pattern (node, &ops, ifn);
1334}
1335
1336/* Perform a replacement of the detected complex mul pattern with the new
1337 instruction sequences. */
1338
1339void
1340complex_fms_pattern::build (vec_info *vinfo)
1341{
1342 slp_tree node;
1343 unsigned i;
5296bd57
TC
1344 slp_tree newnode =
1345 vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
1346 SLP_TREE_REF_COUNT (this->m_ops[0])++;
1347 SLP_TREE_REF_COUNT (this->m_ops[1])++;
1348
478e571a
TC
1349 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1350 vect_free_slp_tree (node);
1351
1352 SLP_TREE_CHILDREN (*this->m_node).release ();
1353 SLP_TREE_CHILDREN (*this->m_node).create (3);
1354
1355 /* First re-arrange the children. */
478e571a 1356 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
5296bd57 1357 SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
55d83cdf 1358 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
478e571a
TC
1359
1360 /* And then rewrite the node itself. */
1361 complex_pattern::build (vinfo);
1362}
1363
b50df1e7
TC
1364/*******************************************************************************
1365 * complex_operations_pattern class
1366 ******************************************************************************/
1367
1368/* This function combines all the existing pattern matchers above into one class
1369 that shares the functionality between them. The initial match is shared
1370 between all complex operations. */
1371
1372class complex_operations_pattern : public complex_pattern
1373{
1374 protected:
1375 complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
1376 internal_fn ifn)
1377 : complex_pattern (node, m_ops, ifn)
1378 {
1379 this->m_num_args = 0;
1380 }
1381
1382 public:
4665cfbc 1383 void build (vec_info *) final override;
b50df1e7 1384 static internal_fn
55d83cdf
TC
1385 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
1386 slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
b50df1e7
TC
1387
1388 static vect_pattern*
55d83cdf
TC
1389 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1390 slp_tree *);
b50df1e7
TC
1391};
1392
1393/* Dummy matches implementation for proxy object. */
1394
1395internal_fn
1396complex_operations_pattern::
1397matches (complex_operation_t /* op */,
1398 slp_tree_to_load_perm_map_t * /* perm_cache */,
55d83cdf 1399 slp_compat_nodes_map_t * /* compat_cache */,
b50df1e7
TC
1400 slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
1401{
1402 return IFN_LAST;
1403}
1404
1405/* Attempt to recognize a complex mul pattern. */
1406
1407vect_pattern*
1408complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
55d83cdf 1409 slp_compat_nodes_map_t *ccache,
b50df1e7
TC
1410 slp_tree *node)
1411{
1412 auto_vec<slp_tree> ops;
1413 complex_operation_t op
1414 = vect_detect_pair_op (*node, true, &ops);
1415 internal_fn ifn = IFN_LAST;
1416
55d83cdf 1417 ifn = complex_fms_pattern::matches (op, perm_cache, ccache, node, &ops);
b50df1e7
TC
1418 if (ifn != IFN_LAST)
1419 return complex_fms_pattern::mkInstance (node, &ops, ifn);
1420
55d83cdf 1421 ifn = complex_mul_pattern::matches (op, perm_cache, ccache, node, &ops);
b50df1e7
TC
1422 if (ifn != IFN_LAST)
1423 return complex_mul_pattern::mkInstance (node, &ops, ifn);
1424
55d83cdf 1425 ifn = complex_add_pattern::matches (op, perm_cache, ccache, node, &ops);
b50df1e7
TC
1426 if (ifn != IFN_LAST)
1427 return complex_add_pattern::mkInstance (node, &ops, ifn);
1428
1429 return NULL;
1430}
1431
1432/* Dummy implementation of build. */
1433
1434void
1435complex_operations_pattern::build (vec_info * /* vinfo */)
1436{
1437 gcc_unreachable ();
1438}
1439
7a6c31f0
RB
1440
1441/* The addsub_pattern. */
1442
1443class addsub_pattern : public vect_pattern
1444{
1445 public:
7d810646
RB
1446 addsub_pattern (slp_tree *node, internal_fn ifn)
1447 : vect_pattern (node, NULL, ifn) {};
7a6c31f0 1448
4665cfbc 1449 void build (vec_info *) final override;
7a6c31f0
RB
1450
1451 static vect_pattern*
55d83cdf
TC
1452 recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1453 slp_tree *);
7a6c31f0
RB
1454};
1455
1456vect_pattern *
55d83cdf
TC
1457addsub_pattern::recognize (slp_tree_to_load_perm_map_t *,
1458 slp_compat_nodes_map_t *, slp_tree *node_)
7a6c31f0
RB
1459{
1460 slp_tree node = *node_;
1461 if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
7d810646
RB
1462 || SLP_TREE_CHILDREN (node).length () != 2
1463 || SLP_TREE_LANE_PERMUTATION (node).length () % 2)
7a6c31f0
RB
1464 return NULL;
1465
1466 /* Match a blend of a plus and a minus op with the same number of plus and
1467 minus lanes on the same operands. */
7d810646
RB
1468 unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1469 unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1470 if (l0 == l1)
1471 return NULL;
1472 bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
1473 PLUS_EXPR);
1474 if (!l0add_p
1475 && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
1476 return NULL;
1477 bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
1478 PLUS_EXPR);
1479 if (!l1add_p
1480 && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
7a6c31f0 1481 return NULL;
7d810646
RB
1482
1483 slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
1484 slp_tree l1node = SLP_TREE_CHILDREN (node)[l1];
1485 if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0]
1486 && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1])
1487 || (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1]
1488 && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0])))
7a6c31f0
RB
1489 return NULL;
1490
1491 for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
1492 {
1493 std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
7d810646 1494 /* It has to be alternating -, +, -,
7a6c31f0
RB
1495 While we could permute the .ADDSUB inputs and the .ADDSUB output
1496 that's only profitable over the add + sub + blend if at least
1497 one of the permute is optimized which we can't determine here. */
7d810646 1498 if (perm.first != ((i & 1) ? l1 : l0)
7a6c31f0
RB
1499 || perm.second != i)
1500 return NULL;
1501 }
1502
7d810646
RB
1503 /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
1504 (l0add_p), see whether we have FMA variants. */
1505 if (!l0add_p
1506 && vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], MULT_EXPR))
1507 {
1508 /* (c * d) -+ a */
1509 if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
1510 return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
1511 }
1512 else if (l0add_p
1513 && vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], MULT_EXPR))
1514 {
1515 /* (c * d) +- a */
1516 if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
1517 return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
1518 }
7a6c31f0 1519
7d810646
RB
1520 if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
1521 return new addsub_pattern (node_, IFN_VEC_ADDSUB);
1522
1523 return NULL;
7a6c31f0
RB
1524}
1525
1526void
1527addsub_pattern::build (vec_info *vinfo)
1528{
1529 slp_tree node = *m_node;
1530
7d810646
RB
1531 unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1532 unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1533
1534 switch (m_ifn)
1535 {
1536 case IFN_VEC_ADDSUB:
1537 {
1538 slp_tree sub = SLP_TREE_CHILDREN (node)[l0];
1539 slp_tree add = SLP_TREE_CHILDREN (node)[l1];
1540
1541 /* Modify the blend node in-place. */
1542 SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
1543 SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
1544 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1545 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1546
1547 /* Build IFN_VEC_ADDSUB from the sub representative operands. */
1548 stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
1549 gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
1550 gimple_assign_rhs1 (rep->stmt),
1551 gimple_assign_rhs2 (rep->stmt));
1552 gimple_call_set_lhs (call, make_ssa_name
1553 (TREE_TYPE (gimple_assign_lhs (rep->stmt))));
1554 gimple_call_set_nothrow (call, true);
1555 gimple_set_bb (call, gimple_bb (rep->stmt));
1556 stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep);
1557 SLP_TREE_REPRESENTATIVE (node) = new_rep;
1558 STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1559 STMT_SLP_TYPE (new_rep) = pure_slp;
1560 STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1561 STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1562 STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep));
1563 SLP_TREE_CODE (node) = ERROR_MARK;
1564 SLP_TREE_LANE_PERMUTATION (node).release ();
1565
1566 vect_free_slp_tree (sub);
1567 vect_free_slp_tree (add);
1568 break;
1569 }
1570 case IFN_VEC_FMADDSUB:
1571 case IFN_VEC_FMSUBADD:
1572 {
1573 slp_tree sub, add;
1574 if (m_ifn == IFN_VEC_FMADDSUB)
1575 {
1576 sub = SLP_TREE_CHILDREN (node)[l0];
1577 add = SLP_TREE_CHILDREN (node)[l1];
1578 }
1579 else /* m_ifn == IFN_VEC_FMSUBADD */
1580 {
1581 sub = SLP_TREE_CHILDREN (node)[l1];
1582 add = SLP_TREE_CHILDREN (node)[l0];
1583 }
1584 slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
1585 /* Modify the blend node in-place. */
1586 SLP_TREE_CHILDREN (node).safe_grow (3, true);
1587 SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
1588 SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
1589 SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
1590 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1591 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1592 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
1593
1594 /* Build IFN_VEC_FMADDSUB from the mul/sub representative operands. */
1595 stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
1596 stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
1597 gcall *call = gimple_build_call_internal (m_ifn, 3,
1598 gimple_assign_rhs1 (mrep->stmt),
1599 gimple_assign_rhs2 (mrep->stmt),
1600 gimple_assign_rhs2 (srep->stmt));
1601 gimple_call_set_lhs (call, make_ssa_name
1602 (TREE_TYPE (gimple_assign_lhs (srep->stmt))));
1603 gimple_call_set_nothrow (call, true);
1604 gimple_set_bb (call, gimple_bb (srep->stmt));
1605 stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep);
1606 SLP_TREE_REPRESENTATIVE (node) = new_rep;
1607 STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1608 STMT_SLP_TYPE (new_rep) = pure_slp;
1609 STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1610 STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1611 STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep));
1612 SLP_TREE_CODE (node) = ERROR_MARK;
1613 SLP_TREE_LANE_PERMUTATION (node).release ();
1614
1615 vect_free_slp_tree (sub);
1616 vect_free_slp_tree (add);
1617 break;
1618 }
1619 default:;
1620 }
7a6c31f0
RB
1621}
1622
3ed472af
TC
1623/*******************************************************************************
1624 * Pattern matching definitions
1625 ******************************************************************************/
1626
1627#define SLP_PATTERN(x) &x::recognize
1628vect_pattern_decl_t slp_patterns[]
1629{
1630 /* For least amount of back-tracking and more efficient matching
1631 order patterns from the largest to the smallest. Especially if they
1632 overlap in what they can detect. */
1633
b50df1e7 1634 SLP_PATTERN (complex_operations_pattern),
7a6c31f0 1635 SLP_PATTERN (addsub_pattern)
3ed472af
TC
1636};
1637#undef SLP_PATTERN
1638
1639/* Set the number of SLP pattern matchers available. */
ca32b29e 1640size_t num__slp_patterns = ARRAY_SIZE (slp_patterns);