]>
Commit | Line | Data |
---|---|---|
3ed472af | 1 | /* SLP - Pattern matcher on SLP trees |
7adcbafe | 2 | Copyright (C) 2020-2022 Free Software Foundation, Inc. |
3ed472af TC |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "backend.h" | |
24 | #include "target.h" | |
25 | #include "rtl.h" | |
26 | #include "tree.h" | |
27 | #include "gimple.h" | |
28 | #include "tree-pass.h" | |
29 | #include "ssa.h" | |
30 | #include "optabs-tree.h" | |
31 | #include "insn-config.h" | |
32 | #include "recog.h" /* FIXME: for insn_data */ | |
33 | #include "fold-const.h" | |
34 | #include "stor-layout.h" | |
35 | #include "gimple-iterator.h" | |
36 | #include "cfgloop.h" | |
37 | #include "tree-vectorizer.h" | |
38 | #include "langhooks.h" | |
39 | #include "gimple-walk.h" | |
40 | #include "dbgcnt.h" | |
41 | #include "tree-vector-builder.h" | |
42 | #include "vec-perm-indices.h" | |
43 | #include "gimple-fold.h" | |
44 | #include "internal-fn.h" | |
45 | ||
46 | /* SLP Pattern matching mechanism. | |
47 | ||
48 | This extension to the SLP vectorizer allows one to transform the generated SLP | |
49 | tree based on any pattern. The difference between this and the normal vect | |
50 | pattern matcher is that unlike the former, this matcher allows you to match | |
51 | with instructions that do not belong to the same SSA dominator graph. | |
52 | ||
53 | The only requirement that this pattern matcher has is that you are only | |
54 | only allowed to either match an entire group or none. | |
55 | ||
56 | The pattern matcher currently only allows you to perform replacements to | |
57 | internal functions. | |
58 | ||
59 | Once the patterns are matched it is one way, these cannot be undone. It is | |
60 | currently not supported to match patterns recursively. | |
61 | ||
62 | To add a new pattern, implement the vect_pattern class and add the type to | |
63 | slp_patterns. | |
64 | ||
65 | */ | |
66 | ||
67 | /******************************************************************************* | |
68 | * vect_pattern class | |
69 | ******************************************************************************/ | |
70 | ||
71 | /* Default implementation of recognize that performs matching, validation and | |
72 | replacement of nodes but that can be overriden if required. */ | |
73 | ||
74 | static bool | |
75 | vect_pattern_validate_optab (internal_fn ifn, slp_tree node) | |
76 | { | |
77 | tree vectype = SLP_TREE_VECTYPE (node); | |
78 | if (ifn == IFN_LAST || !vectype) | |
79 | return false; | |
80 | ||
81 | if (dump_enabled_p ()) | |
82 | dump_printf_loc (MSG_NOTE, vect_location, | |
83 | "Found %s pattern in SLP tree\n", | |
84 | internal_fn_name (ifn)); | |
85 | ||
86 | if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED)) | |
87 | { | |
88 | if (dump_enabled_p ()) | |
89 | dump_printf_loc (MSG_NOTE, vect_location, | |
90 | "Target supports %s vectorization with mode %T\n", | |
91 | internal_fn_name (ifn), vectype); | |
92 | } | |
93 | else | |
94 | { | |
95 | if (dump_enabled_p ()) | |
96 | { | |
97 | if (!vectype) | |
98 | dump_printf_loc (MSG_NOTE, vect_location, | |
953e08fd JJ |
99 | "Target does not support vector type for %G\n", |
100 | STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node))); | |
3ed472af TC |
101 | else |
102 | dump_printf_loc (MSG_NOTE, vect_location, | |
103 | "Target does not support %s for vector type " | |
104 | "%T\n", internal_fn_name (ifn), vectype); | |
105 | } | |
106 | return false; | |
107 | } | |
108 | return true; | |
109 | } | |
110 | ||
111 | /******************************************************************************* | |
112 | * General helper types | |
113 | ******************************************************************************/ | |
114 | ||
115 | /* The COMPLEX_OPERATION enum denotes the possible pair of operations that can | |
116 | be matched when looking for expressions that we are interested matching for | |
117 | complex numbers addition and mla. */ | |
118 | ||
119 | typedef enum _complex_operation : unsigned { | |
120 | PLUS_PLUS, | |
121 | MINUS_PLUS, | |
122 | PLUS_MINUS, | |
123 | MULT_MULT, | |
124 | CMPLX_NONE | |
125 | } complex_operation_t; | |
126 | ||
127 | /******************************************************************************* | |
128 | * General helper functions | |
129 | ******************************************************************************/ | |
130 | ||
131 | /* Helper function of linear_loads_p that checks to see if the load permutation | |
132 | is sequential and in monotonically increasing order of loads with no gaps. | |
133 | */ | |
134 | ||
135 | static inline complex_perm_kinds_t | |
136 | is_linear_load_p (load_permutation_t loads) | |
137 | { | |
138 | if (loads.length() == 0) | |
139 | return PERM_UNKNOWN; | |
140 | ||
141 | unsigned load, i; | |
142 | complex_perm_kinds_t candidates[4] | |
39666d2b | 143 | = { PERM_ODDODD |
3ed472af | 144 | , PERM_EVENEVEN |
39666d2b TC |
145 | , PERM_EVENODD |
146 | , PERM_ODDEVEN | |
3ed472af TC |
147 | }; |
148 | ||
149 | int valid_patterns = 4; | |
39666d2b | 150 | FOR_EACH_VEC_ELT (loads, i, load) |
3ed472af | 151 | { |
55d83cdf TC |
152 | unsigned adj_load = load % 2; |
153 | if (candidates[0] != PERM_UNKNOWN && adj_load != 1) | |
3ed472af TC |
154 | { |
155 | candidates[0] = PERM_UNKNOWN; | |
156 | valid_patterns--; | |
157 | } | |
55d83cdf | 158 | if (candidates[1] != PERM_UNKNOWN && adj_load != 0) |
3ed472af TC |
159 | { |
160 | candidates[1] = PERM_UNKNOWN; | |
161 | valid_patterns--; | |
162 | } | |
39666d2b | 163 | if (candidates[2] != PERM_UNKNOWN && load != i) |
3ed472af TC |
164 | { |
165 | candidates[2] = PERM_UNKNOWN; | |
166 | valid_patterns--; | |
167 | } | |
39666d2b TC |
168 | if (candidates[3] != PERM_UNKNOWN |
169 | && load != (i % 2 == 0 ? i + 1 : i - 1)) | |
3ed472af TC |
170 | { |
171 | candidates[3] = PERM_UNKNOWN; | |
172 | valid_patterns--; | |
173 | } | |
174 | ||
175 | if (valid_patterns == 0) | |
176 | return PERM_UNKNOWN; | |
177 | } | |
178 | ||
179 | for (i = 0; i < sizeof(candidates); i++) | |
180 | if (candidates[i] != PERM_UNKNOWN) | |
181 | return candidates[i]; | |
182 | ||
183 | return PERM_UNKNOWN; | |
184 | } | |
185 | ||
186 | /* Combine complex_perm_kinds A and B into a new permute kind that describes the | |
187 | resulting operation. */ | |
188 | ||
189 | static inline complex_perm_kinds_t | |
190 | vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b) | |
191 | { | |
192 | if (a == b) | |
193 | return a; | |
194 | ||
195 | if (a == PERM_TOP) | |
196 | return b; | |
197 | ||
198 | if (b == PERM_TOP) | |
199 | return a; | |
200 | ||
201 | return PERM_UNKNOWN; | |
202 | } | |
203 | ||
204 | /* Check to see if all loads rooted in ROOT are linear. Linearity is | |
205 | defined as having no gaps between values loaded. */ | |
206 | ||
c3a2bc6d | 207 | static complex_perm_kinds_t |
3ed472af TC |
208 | linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root) |
209 | { | |
210 | if (!root) | |
c3a2bc6d | 211 | return PERM_UNKNOWN; |
3ed472af TC |
212 | |
213 | unsigned i; | |
c3a2bc6d | 214 | complex_perm_kinds_t *tmp; |
3ed472af TC |
215 | |
216 | if ((tmp = perm_cache->get (root)) != NULL) | |
217 | return *tmp; | |
218 | ||
c3a2bc6d | 219 | complex_perm_kinds_t retval = PERM_UNKNOWN; |
3ed472af TC |
220 | perm_cache->put (root, retval); |
221 | ||
222 | /* If it's a load node, then just read the load permute. */ | |
223 | if (SLP_TREE_LOAD_PERMUTATION (root).exists ()) | |
224 | { | |
c3a2bc6d | 225 | retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root)); |
3ed472af TC |
226 | perm_cache->put (root, retval); |
227 | return retval; | |
228 | } | |
229 | else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def) | |
230 | { | |
c3a2bc6d | 231 | retval = PERM_TOP; |
bd4298e1 | 232 | perm_cache->put (root, retval); |
3ed472af TC |
233 | return retval; |
234 | } | |
235 | ||
3ed472af TC |
236 | complex_perm_kinds_t kind = PERM_TOP; |
237 | ||
238 | slp_tree child; | |
239 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child) | |
240 | { | |
c3a2bc6d TC |
241 | complex_perm_kinds_t res = linear_loads_p (perm_cache, child); |
242 | kind = vect_merge_perms (kind, res); | |
159b0bd9 | 243 | /* Unknown and Top are not valid on blends as they produce no permute. */ |
c3a2bc6d | 244 | retval = kind; |
159b0bd9 | 245 | if (kind == PERM_UNKNOWN || kind == PERM_TOP) |
3ed472af | 246 | return retval; |
3ed472af TC |
247 | } |
248 | ||
c3a2bc6d | 249 | retval = kind; |
3ed472af TC |
250 | |
251 | perm_cache->put (root, retval); | |
252 | return retval; | |
253 | } | |
254 | ||
255 | ||
256 | /* This function attempts to make a node rooted in NODE is linear. If the node | |
257 | if already linear than the node itself is returned in RESULT. | |
258 | ||
259 | If the node is not linear then a new VEC_PERM_EXPR node is created with a | |
260 | lane permute that when applied will make the node linear. If such a | |
261 | permute cannot be created then FALSE is returned from the function. | |
262 | ||
263 | Here linearity is defined as having a sequential, monotically increasing | |
264 | load position inside the load permute generated by the loads reachable from | |
265 | NODE. */ | |
266 | ||
267 | static slp_tree | |
268 | vect_build_swap_evenodd_node (slp_tree node) | |
269 | { | |
270 | /* Attempt to linearise the permute. */ | |
271 | vec<std::pair<unsigned, unsigned> > zipped; | |
272 | zipped.create (SLP_TREE_LANES (node)); | |
273 | ||
274 | for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2) | |
275 | { | |
276 | zipped.quick_push (std::make_pair (0, x+1)); | |
277 | zipped.quick_push (std::make_pair (0, x)); | |
278 | } | |
279 | ||
280 | /* Create the new permute node and store it instead. */ | |
281 | slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR); | |
282 | SLP_TREE_LANE_PERMUTATION (vnode) = zipped; | |
283 | SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node); | |
284 | SLP_TREE_CHILDREN (vnode).quick_push (node); | |
285 | SLP_TREE_REF_COUNT (vnode) = 1; | |
286 | SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node); | |
287 | SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node); | |
288 | SLP_TREE_REF_COUNT (node)++; | |
289 | return vnode; | |
290 | } | |
291 | ||
292 | /* Checks to see of the expression represented by NODE is a gimple assign with | |
293 | code CODE. */ | |
294 | ||
295 | static inline bool | |
296 | vect_match_expression_p (slp_tree node, tree_code code) | |
297 | { | |
298 | if (!node | |
299 | || !SLP_TREE_REPRESENTATIVE (node)) | |
300 | return false; | |
301 | ||
302 | gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)); | |
303 | if (!is_gimple_assign (expr) | |
304 | || gimple_assign_rhs_code (expr) != code) | |
305 | return false; | |
306 | ||
307 | return true; | |
308 | } | |
309 | ||
310 | /* Check if the given lane permute in PERMUTES matches an alternating sequence | |
311 | of {even odd even odd ...}. This to account for unrolled loops. Further | |
312 | mode there resulting permute must be linear. */ | |
313 | ||
314 | static inline bool | |
315 | vect_check_evenodd_blend (lane_permutation_t &permutes, | |
316 | unsigned even, unsigned odd) | |
317 | { | |
9c68e2ab TC |
318 | if (permutes.length () == 0 |
319 | || permutes.length () % 2 != 0) | |
3ed472af TC |
320 | return false; |
321 | ||
322 | unsigned val[2] = {even, odd}; | |
323 | unsigned seed = 0; | |
324 | for (unsigned i = 0; i < permutes.length (); i++) | |
325 | if (permutes[i].first != val[i % 2] | |
326 | || permutes[i].second != seed++) | |
327 | return false; | |
328 | ||
329 | return true; | |
330 | } | |
331 | ||
332 | /* This function will match the two gimple expressions representing NODE1 and | |
333 | NODE2 in parallel and returns the pair operation that represents the two | |
334 | expressions in the two statements. | |
335 | ||
336 | If match is successful then the corresponding complex_operation is | |
337 | returned and the arguments to the two matched operations are returned in OPS. | |
338 | ||
339 | If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select | |
340 | from the two nodes alternatingly. | |
341 | ||
342 | If unsuccessful then CMPLX_NONE is returned and OPS is untouched. | |
343 | ||
344 | e.g. the following gimple statements | |
345 | ||
346 | stmt 0 _39 = _37 + _12; | |
347 | stmt 1 _6 = _38 - _36; | |
348 | ||
349 | will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}. | |
350 | */ | |
351 | ||
352 | static complex_operation_t | |
353 | vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes, | |
354 | bool two_operands = true, vec<slp_tree> *ops = NULL) | |
355 | { | |
356 | complex_operation_t result = CMPLX_NONE; | |
357 | ||
358 | if (vect_match_expression_p (node1, MINUS_EXPR) | |
359 | && vect_match_expression_p (node2, PLUS_EXPR) | |
360 | && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1))) | |
361 | result = MINUS_PLUS; | |
362 | else if (vect_match_expression_p (node1, PLUS_EXPR) | |
363 | && vect_match_expression_p (node2, MINUS_EXPR) | |
364 | && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1))) | |
365 | result = PLUS_MINUS; | |
366 | else if (vect_match_expression_p (node1, PLUS_EXPR) | |
367 | && vect_match_expression_p (node2, PLUS_EXPR)) | |
368 | result = PLUS_PLUS; | |
369 | else if (vect_match_expression_p (node1, MULT_EXPR) | |
370 | && vect_match_expression_p (node2, MULT_EXPR)) | |
371 | result = MULT_MULT; | |
372 | ||
373 | if (result != CMPLX_NONE && ops != NULL) | |
374 | { | |
ed3de62a TC |
375 | if (two_operands) |
376 | { | |
377 | auto l0node = SLP_TREE_CHILDREN (node1); | |
378 | auto l1node = SLP_TREE_CHILDREN (node2); | |
379 | ||
380 | /* Check if the tree is connected as we expect it. */ | |
381 | if (!((l0node[0] == l1node[0] && l0node[1] == l1node[1]) | |
382 | || (l0node[0] == l1node[1] && l0node[1] == l1node[0]))) | |
383 | return CMPLX_NONE; | |
384 | } | |
5296bd57 TC |
385 | ops->safe_push (node1); |
386 | ops->safe_push (node2); | |
3ed472af TC |
387 | } |
388 | return result; | |
389 | } | |
390 | ||
391 | /* Overload of vect_detect_pair_op that matches against the representative | |
392 | statements in the children of NODE. It is expected that NODE has exactly | |
393 | two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */ | |
394 | ||
395 | static complex_operation_t | |
396 | vect_detect_pair_op (slp_tree node, bool two_operands = true, | |
397 | vec<slp_tree> *ops = NULL) | |
398 | { | |
399 | if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR) | |
400 | return CMPLX_NONE; | |
401 | ||
402 | if (SLP_TREE_CHILDREN (node).length () != 2) | |
403 | return CMPLX_NONE; | |
404 | ||
405 | vec<slp_tree> children = SLP_TREE_CHILDREN (node); | |
406 | lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node); | |
407 | ||
408 | return vect_detect_pair_op (children[0], children[1], lanes, two_operands, | |
409 | ops); | |
410 | } | |
411 | ||
412 | /******************************************************************************* | |
413 | * complex_pattern class | |
414 | ******************************************************************************/ | |
415 | ||
416 | /* SLP Complex Numbers pattern matching. | |
417 | ||
418 | As an example, the following simple loop: | |
419 | ||
420 | double a[restrict N]; double b[restrict N]; double c[restrict N]; | |
421 | ||
422 | for (int i=0; i < N; i+=2) | |
423 | { | |
424 | c[i] = a[i] - b[i+1]; | |
425 | c[i+1] = a[i+1] + b[i]; | |
426 | } | |
427 | ||
428 | which represents a complex addition on with a rotation of 90* around the | |
429 | argand plane. i.e. if `a` and `b` were complex numbers then this would be the | |
430 | same as `a + (b * I)`. | |
431 | ||
432 | Here the expressions for `c[i]` and `c[i+1]` are independent but have to be | |
433 | both recognized in order for the pattern to work. As an SLP tree this is | |
434 | represented as | |
435 | ||
436 | +--------------------------------+ | |
437 | | stmt 0 *_9 = _10; | | |
438 | | stmt 1 *_15 = _16; | | |
439 | +--------------------------------+ | |
440 | | | |
441 | | | |
442 | v | |
443 | +--------------------------------+ | |
444 | | stmt 0 _10 = _4 - _8; | | |
445 | | stmt 1 _16 = _12 + _14; | | |
446 | | lane permutation { 0[0] 1[1] } | | |
447 | +--------------------------------+ | |
448 | | | | |
449 | | | | |
450 | | | | |
451 | +-----+ | | +-----+ | |
452 | | | | | | | | |
453 | +-----| { } |<-----+ +----->| { } --------+ | |
454 | | | | +------------------| | | | |
455 | | +-----+ | +-----+ | | |
456 | | | | | | |
457 | | | | | | |
458 | | +------|------------------+ | | |
459 | | | | | | |
460 | v v v v | |
461 | +--------------------------+ +--------------------------------+ | |
462 | | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; | | |
463 | | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; | | |
464 | | load permutation { 1 0 } | | load permutation { 0 1 } | | |
465 | +--------------------------+ +--------------------------------+ | |
466 | ||
467 | The pattern matcher allows you to replace both statements 0 and 1 or none at | |
468 | all. Because this operation is a two operands operation the actual nodes | |
469 | being replaced are those in the { } nodes. The actual scalar statements | |
470 | themselves are not replaced or used during the matching but instead the | |
471 | SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to | |
472 | replace and match on any number of nodes. | |
473 | ||
474 | Because the pattern matcher matches on the representative statement for the | |
475 | SLP node the case of two_operators it allows you to match the children of the | |
476 | node. This is done using the method `recognize ()`. | |
477 | ||
478 | */ | |
479 | ||
480 | /* The complex_pattern class contains common code for pattern matchers that work | |
481 | on complex numbers. These provide functionality to allow de-construction and | |
482 | validation of sequences depicting/transforming REAL and IMAG pairs. */ | |
483 | ||
484 | class complex_pattern : public vect_pattern | |
485 | { | |
486 | protected: | |
487 | auto_vec<slp_tree> m_workset; | |
488 | complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
489 | : vect_pattern (node, m_ops, ifn) | |
490 | { | |
491 | this->m_workset.safe_push (*node); | |
492 | } | |
493 | ||
494 | public: | |
4665cfbc | 495 | void build (vec_info *) override; |
3ed472af TC |
496 | |
497 | static internal_fn | |
0c18faac | 498 | matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *, |
3ed472af TC |
499 | vec<slp_tree> *); |
500 | }; | |
501 | ||
502 | /* Create a replacement pattern statement for each node in m_node and inserts | |
503 | the new statement into m_node as the new representative statement. The old | |
504 | statement is marked as being in a pattern defined by the new statement. The | |
505 | statement is created as call to internal function IFN with m_num_args | |
506 | arguments. | |
507 | ||
508 | Futhermore the new pattern is also added to the vectorization information | |
509 | structure VINFO and the old statement STMT_INFO is marked as unused while | |
510 | the new statement is marked as used and the number of SLP uses of the new | |
511 | statement is incremented. | |
512 | ||
513 | The newly created SLP nodes are marked as SLP only and will be dissolved | |
514 | if SLP is aborted. | |
515 | ||
516 | The newly created gimple call is returned and the BB remains unchanged. | |
517 | ||
518 | This default method is designed to only match against simple operands where | |
519 | all the input and output types are the same. | |
520 | */ | |
521 | ||
522 | void | |
523 | complex_pattern::build (vec_info *vinfo) | |
524 | { | |
525 | stmt_vec_info stmt_info; | |
526 | ||
527 | auto_vec<tree> args; | |
528 | args.create (this->m_num_args); | |
529 | args.quick_grow_cleared (this->m_num_args); | |
530 | slp_tree node; | |
531 | unsigned ix; | |
532 | stmt_vec_info call_stmt_info; | |
533 | gcall *call_stmt = NULL; | |
534 | ||
535 | /* Now modify the nodes themselves. */ | |
536 | FOR_EACH_VEC_ELT (this->m_workset, ix, node) | |
537 | { | |
538 | /* Calculate the location of the statement in NODE to replace. */ | |
539 | stmt_info = SLP_TREE_REPRESENTATIVE (node); | |
374f93da RB |
540 | stmt_vec_info reduc_def |
541 | = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)); | |
3ed472af TC |
542 | gimple* old_stmt = STMT_VINFO_STMT (stmt_info); |
543 | tree lhs_old_stmt = gimple_get_lhs (old_stmt); | |
544 | tree type = TREE_TYPE (lhs_old_stmt); | |
545 | ||
546 | /* Create the argument set for use by gimple_build_call_internal_vec. */ | |
547 | for (unsigned i = 0; i < this->m_num_args; i++) | |
548 | args[i] = lhs_old_stmt; | |
549 | ||
550 | /* Create the new pattern statements. */ | |
551 | call_stmt = gimple_build_call_internal_vec (this->m_ifn, args); | |
552 | tree var = make_temp_ssa_name (type, call_stmt, "slp_patt"); | |
553 | gimple_call_set_lhs (call_stmt, var); | |
554 | gimple_set_location (call_stmt, gimple_location (old_stmt)); | |
555 | gimple_call_set_nothrow (call_stmt, true); | |
556 | ||
557 | /* Adjust the book-keeping for the new and old statements for use during | |
558 | SLP. This is required to get the right VF and statement during SLP | |
559 | analysis. These changes are created after relevancy has been set for | |
560 | the nodes as such we need to manually update them. Any changes will be | |
561 | undone if SLP is cancelled. */ | |
562 | call_stmt_info | |
563 | = vinfo->add_pattern_stmt (call_stmt, stmt_info); | |
564 | ||
565 | /* Make sure to mark the representative statement pure_slp and | |
374f93da | 566 | relevant and transfer reduction info. */ |
3ed472af TC |
567 | STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope; |
568 | STMT_SLP_TYPE (call_stmt_info) = pure_slp; | |
374f93da | 569 | STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def; |
3ed472af | 570 | |
a29124d2 RB |
571 | gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt)); |
572 | STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node); | |
5e606ed9 | 573 | STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true; |
3ed472af TC |
574 | |
575 | /* Since we are replacing all the statements in the group with the same | |
576 | thing it doesn't really matter. So just set it every time a new stmt | |
577 | is created. */ | |
578 | SLP_TREE_REPRESENTATIVE (node) = call_stmt_info; | |
579 | SLP_TREE_LANE_PERMUTATION (node).release (); | |
580 | SLP_TREE_CODE (node) = CALL_EXPR; | |
581 | } | |
582 | } | |
583 | ||
584 | /******************************************************************************* | |
585 | * complex_add_pattern class | |
586 | ******************************************************************************/ | |
587 | ||
588 | class complex_add_pattern : public complex_pattern | |
589 | { | |
590 | protected: | |
591 | complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
592 | : complex_pattern (node, m_ops, ifn) | |
593 | { | |
594 | this->m_num_args = 2; | |
595 | } | |
596 | ||
597 | public: | |
4665cfbc | 598 | void build (vec_info *) final override; |
3ed472af | 599 | static internal_fn |
55d83cdf TC |
600 | matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, |
601 | slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *); | |
3ed472af TC |
602 | |
603 | static vect_pattern* | |
55d83cdf TC |
604 | recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *, |
605 | slp_tree *); | |
0c18faac TC |
606 | |
607 | static vect_pattern* | |
608 | mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
609 | { | |
610 | return new complex_add_pattern (node, m_ops, ifn); | |
611 | } | |
3ed472af TC |
612 | }; |
613 | ||
614 | /* Perform a replacement of the detected complex add pattern with the new | |
615 | instruction sequences. */ | |
616 | ||
617 | void | |
618 | complex_add_pattern::build (vec_info *vinfo) | |
619 | { | |
fe701195 TC |
620 | SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2); |
621 | ||
3ed472af TC |
622 | slp_tree node = this->m_ops[0]; |
623 | vec<slp_tree> children = SLP_TREE_CHILDREN (node); | |
624 | ||
625 | /* First re-arrange the children. */ | |
fe701195 TC |
626 | SLP_TREE_CHILDREN (*this->m_node)[0] = children[0]; |
627 | SLP_TREE_CHILDREN (*this->m_node)[1] = | |
628 | vect_build_swap_evenodd_node (children[1]); | |
3ed472af | 629 | |
fe701195 TC |
630 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++; |
631 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++; | |
0c18faac TC |
632 | vect_free_slp_tree (this->m_ops[0]); |
633 | vect_free_slp_tree (this->m_ops[1]); | |
634 | ||
3ed472af TC |
635 | complex_pattern::build (vinfo); |
636 | } | |
637 | ||
638 | /* Pattern matcher for trying to match complex addition pattern in SLP tree. | |
639 | ||
640 | If no match is found then IFN is set to IFN_LAST. | |
641 | This function matches the patterns shaped as: | |
642 | ||
643 | c[i] = a[i] - b[i+1]; | |
644 | c[i+1] = a[i+1] + b[i]; | |
645 | ||
646 | If a match occurred then TRUE is returned, else FALSE. The initial match is | |
647 | expected to be in OP1 and the initial match operands in args0. */ | |
648 | ||
649 | internal_fn | |
650 | complex_add_pattern::matches (complex_operation_t op, | |
651 | slp_tree_to_load_perm_map_t *perm_cache, | |
55d83cdf | 652 | slp_compat_nodes_map_t * /* compat_cache */, |
0c18faac | 653 | slp_tree *node, vec<slp_tree> *ops) |
3ed472af TC |
654 | { |
655 | internal_fn ifn = IFN_LAST; | |
656 | ||
657 | /* Find the two components. Rotation in the complex plane will modify | |
658 | the operations: | |
659 | ||
660 | * Rotation 0: + + | |
661 | * Rotation 90: - + | |
662 | * Rotation 180: - - | |
663 | * Rotation 270: + - | |
664 | ||
665 | Rotation 0 and 180 can be handled by normal SIMD code, so we don't need | |
666 | to care about them here. */ | |
667 | if (op == MINUS_PLUS) | |
668 | ifn = IFN_COMPLEX_ADD_ROT90; | |
669 | else if (op == PLUS_MINUS) | |
670 | ifn = IFN_COMPLEX_ADD_ROT270; | |
671 | else | |
672 | return ifn; | |
673 | ||
674 | /* verify that there is a permute, otherwise this isn't a pattern we | |
675 | we support. */ | |
676 | gcc_assert (ops->length () == 2); | |
677 | ||
678 | vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]); | |
679 | ||
680 | /* First node must be unpermuted. */ | |
c3a2bc6d | 681 | if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD) |
3ed472af TC |
682 | return IFN_LAST; |
683 | ||
684 | /* Second node must be permuted. */ | |
c3a2bc6d | 685 | if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN) |
3ed472af TC |
686 | return IFN_LAST; |
687 | ||
0c18faac TC |
688 | if (!vect_pattern_validate_optab (ifn, *node)) |
689 | return IFN_LAST; | |
690 | ||
3ed472af TC |
691 | return ifn; |
692 | } | |
693 | ||
694 | /* Attempt to recognize a complex add pattern. */ | |
695 | ||
696 | vect_pattern* | |
697 | complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache, | |
55d83cdf | 698 | slp_compat_nodes_map_t *compat_cache, |
3ed472af TC |
699 | slp_tree *node) |
700 | { | |
701 | auto_vec<slp_tree> ops; | |
702 | complex_operation_t op | |
703 | = vect_detect_pair_op (*node, true, &ops); | |
0c18faac | 704 | internal_fn ifn |
55d83cdf | 705 | = complex_add_pattern::matches (op, perm_cache, compat_cache, node, &ops); |
0c18faac | 706 | if (ifn == IFN_LAST) |
3ed472af TC |
707 | return NULL; |
708 | ||
709 | return new complex_add_pattern (node, &ops, ifn); | |
710 | } | |
711 | ||
e09173d8 TC |
712 | /******************************************************************************* |
713 | * complex_mul_pattern | |
714 | ******************************************************************************/ | |
715 | ||
55d83cdf | 716 | /* Helper function to check if PERM is KIND or PERM_TOP. */ |
e09173d8 TC |
717 | |
718 | static inline bool | |
55d83cdf TC |
719 | is_eq_or_top (slp_tree_to_load_perm_map_t *perm_cache, |
720 | slp_tree op1, complex_perm_kinds_t kind1, | |
721 | slp_tree op2, complex_perm_kinds_t kind2) | |
e09173d8 | 722 | { |
55d83cdf TC |
723 | complex_perm_kinds_t perm1 = linear_loads_p (perm_cache, op1); |
724 | if (perm1 != kind1 && perm1 != PERM_TOP) | |
725 | return false; | |
e09173d8 | 726 | |
55d83cdf TC |
727 | complex_perm_kinds_t perm2 = linear_loads_p (perm_cache, op2); |
728 | if (perm2 != kind2 && perm2 != PERM_TOP) | |
729 | return false; | |
e09173d8 | 730 | |
55d83cdf | 731 | return true; |
e09173d8 TC |
732 | } |
733 | ||
55d83cdf | 734 | enum _conj_status { CONJ_NONE, CONJ_FST, CONJ_SND }; |
e09173d8 TC |
735 | |
736 | static inline bool | |
55d83cdf TC |
737 | compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache, |
738 | slp_tree a, int *pa, slp_tree b, int *pb) | |
e09173d8 | 739 | { |
55d83cdf TC |
740 | bool *tmp; |
741 | std::pair<slp_tree, slp_tree> key = std::make_pair(a, b); | |
742 | if ((tmp = compat_cache->get (key)) != NULL) | |
743 | return *tmp; | |
e09173d8 | 744 | |
55d83cdf | 745 | compat_cache->put (key, false); |
e09173d8 | 746 | |
55d83cdf TC |
747 | if (SLP_TREE_CHILDREN (a).length () != SLP_TREE_CHILDREN (b).length ()) |
748 | return false; | |
e09173d8 | 749 | |
55d83cdf TC |
750 | if (SLP_TREE_DEF_TYPE (a) != SLP_TREE_DEF_TYPE (b)) |
751 | return false; | |
752 | ||
753 | /* Only internal nodes can be loads, as such we can't check further if they | |
754 | are externals. */ | |
755 | if (SLP_TREE_DEF_TYPE (a) != vect_internal_def) | |
e09173d8 | 756 | { |
55d83cdf TC |
757 | for (unsigned i = 0; i < SLP_TREE_SCALAR_OPS (a).length (); i++) |
758 | { | |
759 | tree op1 = SLP_TREE_SCALAR_OPS (a)[pa[i % 2]]; | |
760 | tree op2 = SLP_TREE_SCALAR_OPS (b)[pb[i % 2]]; | |
761 | if (!operand_equal_p (op1, op2, 0)) | |
762 | return false; | |
763 | } | |
764 | ||
765 | compat_cache->put (key, true); | |
766 | return true; | |
e09173d8 | 767 | } |
55d83cdf TC |
768 | |
769 | auto a_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (a)); | |
770 | auto b_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (b)); | |
771 | ||
772 | if (gimple_code (a_stmt) != gimple_code (b_stmt)) | |
773 | return false; | |
774 | ||
775 | /* code, children, type, externals, loads, constants */ | |
776 | if (gimple_num_args (a_stmt) != gimple_num_args (b_stmt)) | |
777 | return false; | |
778 | ||
779 | /* At this point, a and b are known to be the same gimple operations. */ | |
780 | if (is_gimple_call (a_stmt)) | |
781 | { | |
782 | if (!compatible_calls_p (dyn_cast <gcall *> (a_stmt), | |
783 | dyn_cast <gcall *> (b_stmt))) | |
784 | return false; | |
785 | } | |
786 | else if (!is_gimple_assign (a_stmt)) | |
787 | return false; | |
e09173d8 TC |
788 | else |
789 | { | |
55d83cdf TC |
790 | tree_code acode = gimple_assign_rhs_code (a_stmt); |
791 | tree_code bcode = gimple_assign_rhs_code (b_stmt); | |
792 | if ((acode == REALPART_EXPR || acode == IMAGPART_EXPR) | |
793 | && (bcode == REALPART_EXPR || bcode == IMAGPART_EXPR)) | |
794 | return true; | |
795 | ||
796 | if (acode != bcode) | |
e09173d8 TC |
797 | return false; |
798 | } | |
799 | ||
55d83cdf TC |
800 | if (!SLP_TREE_LOAD_PERMUTATION (a).exists () |
801 | || !SLP_TREE_LOAD_PERMUTATION (b).exists ()) | |
e09173d8 | 802 | { |
55d83cdf TC |
803 | for (unsigned i = 0; i < gimple_num_args (a_stmt); i++) |
804 | { | |
805 | tree t1 = gimple_arg (a_stmt, i); | |
806 | tree t2 = gimple_arg (b_stmt, i); | |
807 | if (TREE_CODE (t1) != TREE_CODE (t2)) | |
808 | return false; | |
809 | ||
810 | /* If SSA name then we will need to inspect the children | |
811 | so we can punt here. */ | |
812 | if (TREE_CODE (t1) == SSA_NAME) | |
813 | continue; | |
814 | ||
815 | if (!operand_equal_p (t1, t2, 0)) | |
816 | return false; | |
817 | } | |
e09173d8 | 818 | } |
55d83cdf | 819 | else |
e09173d8 | 820 | { |
55d83cdf TC |
821 | auto dr1 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a)); |
822 | auto dr2 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b)); | |
823 | /* Don't check the last dimension as that's checked by the lineary | |
824 | checks. This check is also much stricter than what we need | |
825 | because it doesn't consider loading from adjacent elements | |
826 | in the same struct as loading from the same base object. | |
827 | But for now, I'll play it safe. */ | |
828 | if (!same_data_refs (dr1, dr2, 1)) | |
e09173d8 TC |
829 | return false; |
830 | } | |
55d83cdf TC |
831 | |
832 | for (unsigned i = 0; i < SLP_TREE_CHILDREN (a).length (); i++) | |
487d604b | 833 | { |
55d83cdf TC |
834 | if (!compatible_complex_nodes_p (compat_cache, |
835 | SLP_TREE_CHILDREN (a)[i], pa, | |
836 | SLP_TREE_CHILDREN (b)[i], pb)) | |
487d604b | 837 | return false; |
487d604b | 838 | } |
e09173d8 | 839 | |
55d83cdf | 840 | compat_cache->put (key, true); |
e09173d8 TC |
841 | return true; |
842 | } | |
843 | ||
e09173d8 TC |
844 | static inline bool |
845 | vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache, | |
55d83cdf TC |
846 | slp_compat_nodes_map_t *compat_cache, |
847 | vec<slp_tree> &left_op, | |
848 | vec<slp_tree> &right_op, | |
849 | bool subtract, | |
850 | enum _conj_status *_status) | |
e09173d8 | 851 | { |
55d83cdf TC |
852 | auto_vec<slp_tree> ops; |
853 | enum _conj_status stats = CONJ_NONE; | |
854 | ||
855 | /* The complex operations can occur in two layouts and two permute sequences | |
856 | so declare them and re-use them. */ | |
857 | int styles[][4] = { { 0, 2, 1, 3} /* {L1, R1} + {L2, R2}. */ | |
858 | , { 0, 3, 1, 2} /* {L1, R2} + {L2, R1}. */ | |
859 | }; | |
860 | ||
861 | /* Now for the corresponding permutes that go with these values. */ | |
862 | complex_perm_kinds_t perms[][4] | |
863 | = { { PERM_EVENEVEN, PERM_ODDODD, PERM_EVENODD, PERM_ODDEVEN } | |
864 | , { PERM_EVENODD, PERM_ODDEVEN, PERM_EVENEVEN, PERM_ODDODD } | |
865 | }; | |
866 | ||
867 | /* These permutes are used during comparisons of externals on which | |
868 | we require strict equality. */ | |
869 | int cq[][4][2] | |
870 | = { { { 0, 0 }, { 1, 1 }, { 0, 1 }, { 1, 0 } } | |
871 | , { { 0, 1 }, { 1, 0 }, { 0, 0 }, { 1, 1 } } | |
872 | }; | |
873 | ||
874 | /* Default to style and perm 0, most operations use this one. */ | |
875 | int style = 0; | |
876 | int perm = subtract ? 1 : 0; | |
877 | ||
878 | /* Check if we have a negate operation, if so absorb the node and continue | |
879 | looking. */ | |
880 | bool neg0 = vect_match_expression_p (right_op[0], NEGATE_EXPR); | |
881 | bool neg1 = vect_match_expression_p (right_op[1], NEGATE_EXPR); | |
882 | ||
883 | /* Determine which style we're looking at. We only have different ones | |
884 | whenever a conjugate is involved. */ | |
885 | if (neg0 && neg1) | |
886 | ; | |
887 | else if (neg0) | |
e09173d8 | 888 | { |
55d83cdf TC |
889 | right_op[0] = SLP_TREE_CHILDREN (right_op[0])[0]; |
890 | stats = CONJ_FST; | |
891 | if (subtract) | |
892 | perm = 0; | |
e09173d8 | 893 | } |
55d83cdf TC |
894 | else if (neg1) |
895 | { | |
896 | right_op[1] = SLP_TREE_CHILDREN (right_op[1])[0]; | |
897 | stats = CONJ_SND; | |
898 | perm = 1; | |
899 | } | |
900 | ||
901 | *_status = stats; | |
902 | ||
903 | /* Flatten the inputs after we've remapped them. */ | |
904 | ops.create (4); | |
905 | ops.safe_splice (left_op); | |
906 | ops.safe_splice (right_op); | |
907 | ||
908 | /* Extract out the elements to check. */ | |
909 | slp_tree op0 = ops[styles[style][0]]; | |
910 | slp_tree op1 = ops[styles[style][1]]; | |
911 | slp_tree op2 = ops[styles[style][2]]; | |
912 | slp_tree op3 = ops[styles[style][3]]; | |
913 | ||
914 | /* Do cheapest test first. If failed no need to analyze further. */ | |
915 | if (linear_loads_p (perm_cache, op0) != perms[perm][0] | |
916 | || linear_loads_p (perm_cache, op1) != perms[perm][1] | |
917 | || !is_eq_or_top (perm_cache, op2, perms[perm][2], op3, perms[perm][3])) | |
918 | return false; | |
919 | ||
920 | return compatible_complex_nodes_p (compat_cache, op0, cq[perm][0], op1, | |
921 | cq[perm][1]) | |
922 | && compatible_complex_nodes_p (compat_cache, op2, cq[perm][2], op3, | |
923 | cq[perm][3]); | |
e09173d8 TC |
924 | } |
925 | ||
926 | /* This function combines two nodes containing only even and only odd lanes | |
927 | together into a single node which contains the nodes in even/odd order | |
928 | by using a lane permute. | |
929 | ||
930 | The lanes in EVEN and ODD are duplicated 2 times inside the vectors. | |
931 | So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}. | |
932 | ||
933 | The tree REPRESENTATION is taken from the supplied REP along with the | |
934 | vectype which must be the same between all three nodes. | |
935 | */ | |
936 | ||
937 | static slp_tree | |
938 | vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep) | |
939 | { | |
940 | vec<std::pair<unsigned, unsigned> > perm; | |
941 | perm.create (SLP_TREE_LANES (rep)); | |
942 | ||
943 | for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2) | |
944 | { | |
945 | perm.quick_push (std::make_pair (0, x)); | |
946 | perm.quick_push (std::make_pair (1, x+1)); | |
947 | } | |
948 | ||
949 | slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even)); | |
950 | SLP_TREE_CODE (vnode) = VEC_PERM_EXPR; | |
951 | SLP_TREE_LANE_PERMUTATION (vnode) = perm; | |
952 | ||
953 | SLP_TREE_CHILDREN (vnode).create (2); | |
954 | SLP_TREE_CHILDREN (vnode).quick_push (even); | |
955 | SLP_TREE_CHILDREN (vnode).quick_push (odd); | |
956 | SLP_TREE_REF_COUNT (even)++; | |
957 | SLP_TREE_REF_COUNT (odd)++; | |
958 | SLP_TREE_REF_COUNT (vnode) = 1; | |
959 | ||
960 | SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep); | |
961 | gcc_assert (perm.length () == SLP_TREE_LANES (vnode)); | |
962 | /* Representation is set to that of the current node as the vectorizer | |
963 | can't deal with VEC_PERMs with no representation, as would be the | |
964 | case with invariants. */ | |
965 | SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep); | |
966 | SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep); | |
967 | return vnode; | |
968 | } | |
969 | ||
970 | class complex_mul_pattern : public complex_pattern | |
971 | { | |
972 | protected: | |
973 | complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
974 | : complex_pattern (node, m_ops, ifn) | |
975 | { | |
976 | this->m_num_args = 2; | |
977 | } | |
978 | ||
979 | public: | |
4665cfbc | 980 | void build (vec_info *) final override; |
e09173d8 | 981 | static internal_fn |
55d83cdf TC |
982 | matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, |
983 | slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *); | |
e09173d8 TC |
984 | |
985 | static vect_pattern* | |
55d83cdf TC |
986 | recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *, |
987 | slp_tree *); | |
e09173d8 TC |
988 | |
989 | static vect_pattern* | |
990 | mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
991 | { | |
992 | return new complex_mul_pattern (node, m_ops, ifn); | |
993 | } | |
994 | ||
995 | }; | |
996 | ||
ed3de62a TC |
997 | /* Pattern matcher for trying to match complex multiply and complex multiply |
998 | and accumulate pattern in SLP tree. If the operation matches then IFN | |
999 | is set to the operation it matched and the arguments to the two | |
1000 | replacement statements are put in m_ops. | |
e09173d8 TC |
1001 | |
1002 | If no match is found then IFN is set to IFN_LAST and m_ops is unchanged. | |
1003 | ||
1004 | This function matches the patterns shaped as: | |
1005 | ||
1006 | double ax = (b[i+1] * a[i]); | |
1007 | double bx = (a[i+1] * b[i]); | |
1008 | ||
1009 | c[i] = c[i] - ax; | |
1010 | c[i+1] = c[i+1] + bx; | |
1011 | ||
1012 | If a match occurred then TRUE is returned, else FALSE. The initial match is | |
1013 | expected to be in OP1 and the initial match operands in args0. */ | |
1014 | ||
1015 | internal_fn | |
1016 | complex_mul_pattern::matches (complex_operation_t op, | |
1017 | slp_tree_to_load_perm_map_t *perm_cache, | |
55d83cdf | 1018 | slp_compat_nodes_map_t *compat_cache, |
e09173d8 TC |
1019 | slp_tree *node, vec<slp_tree> *ops) |
1020 | { | |
1021 | internal_fn ifn = IFN_LAST; | |
1022 | ||
1023 | if (op != MINUS_PLUS) | |
1024 | return IFN_LAST; | |
1025 | ||
ed3de62a TC |
1026 | auto childs = *ops; |
1027 | auto l0node = SLP_TREE_CHILDREN (childs[0]); | |
ed3de62a TC |
1028 | |
1029 | bool mul0 = vect_match_expression_p (l0node[0], MULT_EXPR); | |
1030 | bool mul1 = vect_match_expression_p (l0node[1], MULT_EXPR); | |
487d604b | 1031 | if (!mul0 && !mul1) |
e09173d8 TC |
1032 | return IFN_LAST; |
1033 | ||
1034 | /* Now operand2+4 may lead to another expression. */ | |
1035 | auto_vec<slp_tree> left_op, right_op; | |
ed3de62a TC |
1036 | slp_tree add0 = NULL; |
1037 | ||
1038 | /* Check if we may be a multiply add. */ | |
1039 | if (!mul0 | |
1040 | && vect_match_expression_p (l0node[0], PLUS_EXPR)) | |
1041 | { | |
1042 | auto vals = SLP_TREE_CHILDREN (l0node[0]); | |
1043 | /* Check if it's a multiply, otherwise no idea what this is. */ | |
487d604b | 1044 | if (!(mul0 = vect_match_expression_p (vals[1], MULT_EXPR))) |
ed3de62a | 1045 | return IFN_LAST; |
e09173d8 | 1046 | |
ed3de62a TC |
1047 | /* Check if the ADD is linear, otherwise it's not valid complex FMA. */ |
1048 | if (linear_loads_p (perm_cache, vals[0]) != PERM_EVENODD) | |
1049 | return IFN_LAST; | |
1050 | ||
1051 | left_op.safe_splice (SLP_TREE_CHILDREN (vals[1])); | |
1052 | add0 = vals[0]; | |
1053 | } | |
1054 | else | |
1055 | left_op.safe_splice (SLP_TREE_CHILDREN (l0node[0])); | |
1056 | ||
1057 | right_op.safe_splice (SLP_TREE_CHILDREN (l0node[1])); | |
1058 | ||
1059 | if (left_op.length () != 2 | |
1060 | || right_op.length () != 2 | |
487d604b TC |
1061 | || !mul0 |
1062 | || !mul1 | |
ed3de62a | 1063 | || linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN) |
e09173d8 TC |
1064 | return IFN_LAST; |
1065 | ||
55d83cdf TC |
1066 | enum _conj_status status; |
1067 | if (!vect_validate_multiplication (perm_cache, compat_cache, left_op, | |
1068 | right_op, false, &status)) | |
1069 | return IFN_LAST; | |
e09173d8 | 1070 | |
55d83cdf | 1071 | if (status == CONJ_NONE) |
e09173d8 | 1072 | { |
487d604b | 1073 | if (add0) |
ed3de62a TC |
1074 | ifn = IFN_COMPLEX_FMA; |
1075 | else | |
1076 | ifn = IFN_COMPLEX_MUL; | |
e09173d8 | 1077 | } |
ed3de62a | 1078 | else |
e09173d8 | 1079 | { |
487d604b | 1080 | if(add0) |
ed3de62a TC |
1081 | ifn = IFN_COMPLEX_FMA_CONJ; |
1082 | else | |
1083 | ifn = IFN_COMPLEX_MUL_CONJ; | |
e09173d8 TC |
1084 | } |
1085 | ||
1086 | if (!vect_pattern_validate_optab (ifn, *node)) | |
1087 | return IFN_LAST; | |
1088 | ||
1089 | ops->truncate (0); | |
ed3de62a TC |
1090 | ops->create (add0 ? 4 : 3); |
1091 | ||
1092 | if (add0) | |
1093 | ops->quick_push (add0); | |
e09173d8 | 1094 | |
c3a2bc6d | 1095 | complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]); |
55d83cdf | 1096 | if (kind == PERM_EVENODD || kind == PERM_TOP) |
e09173d8 TC |
1097 | { |
1098 | ops->quick_push (left_op[1]); | |
1099 | ops->quick_push (right_op[1]); | |
1100 | ops->quick_push (left_op[0]); | |
1101 | } | |
55d83cdf | 1102 | else if (kind == PERM_EVENEVEN && status != CONJ_SND) |
e09173d8 TC |
1103 | { |
1104 | ops->quick_push (left_op[0]); | |
1105 | ops->quick_push (right_op[0]); | |
1106 | ops->quick_push (left_op[1]); | |
1107 | } | |
1108 | else | |
1109 | { | |
1110 | ops->quick_push (left_op[0]); | |
1111 | ops->quick_push (right_op[1]); | |
1112 | ops->quick_push (left_op[1]); | |
1113 | } | |
1114 | ||
1115 | return ifn; | |
1116 | } | |
1117 | ||
1118 | /* Attempt to recognize a complex mul pattern. */ | |
1119 | ||
1120 | vect_pattern* | |
1121 | complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache, | |
55d83cdf | 1122 | slp_compat_nodes_map_t *compat_cache, |
e09173d8 TC |
1123 | slp_tree *node) |
1124 | { | |
1125 | auto_vec<slp_tree> ops; | |
1126 | complex_operation_t op | |
1127 | = vect_detect_pair_op (*node, true, &ops); | |
1128 | internal_fn ifn | |
55d83cdf | 1129 | = complex_mul_pattern::matches (op, perm_cache, compat_cache, node, &ops); |
e09173d8 TC |
1130 | if (ifn == IFN_LAST) |
1131 | return NULL; | |
1132 | ||
1133 | return new complex_mul_pattern (node, &ops, ifn); | |
1134 | } | |
1135 | ||
1136 | /* Perform a replacement of the detected complex mul pattern with the new | |
1137 | instruction sequences. */ | |
1138 | ||
1139 | void | |
1140 | complex_mul_pattern::build (vec_info *vinfo) | |
1141 | { | |
1142 | slp_tree node; | |
1143 | unsigned i; | |
ed3de62a TC |
1144 | switch (this->m_ifn) |
1145 | { | |
1146 | case IFN_COMPLEX_MUL: | |
1147 | case IFN_COMPLEX_MUL_CONJ: | |
1148 | { | |
1149 | slp_tree newnode | |
1150 | = vect_build_combine_node (this->m_ops[0], this->m_ops[1], | |
1151 | *this->m_node); | |
1152 | SLP_TREE_REF_COUNT (this->m_ops[2])++; | |
1153 | ||
1154 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node) | |
1155 | vect_free_slp_tree (node); | |
1156 | ||
1157 | /* First re-arrange the children. */ | |
1158 | SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2); | |
1159 | SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2]; | |
1160 | SLP_TREE_CHILDREN (*this->m_node)[1] = newnode; | |
1161 | break; | |
1162 | } | |
1163 | case IFN_COMPLEX_FMA: | |
1164 | case IFN_COMPLEX_FMA_CONJ: | |
1165 | { | |
1166 | SLP_TREE_REF_COUNT (this->m_ops[0])++; | |
1167 | slp_tree newnode | |
1168 | = vect_build_combine_node (this->m_ops[1], this->m_ops[2], | |
1169 | *this->m_node); | |
1170 | SLP_TREE_REF_COUNT (this->m_ops[3])++; | |
1171 | ||
1172 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node) | |
1173 | vect_free_slp_tree (node); | |
1174 | ||
1175 | /* First re-arrange the children. */ | |
1176 | SLP_TREE_CHILDREN (*this->m_node).safe_grow (3); | |
55d83cdf TC |
1177 | SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[3]; |
1178 | SLP_TREE_CHILDREN (*this->m_node)[1] = newnode; | |
1179 | SLP_TREE_CHILDREN (*this->m_node)[2] = this->m_ops[0]; | |
ed3de62a TC |
1180 | |
1181 | /* Tell the builder to expect an extra argument. */ | |
1182 | this->m_num_args++; | |
1183 | break; | |
1184 | } | |
1185 | default: | |
1186 | gcc_unreachable (); | |
1187 | } | |
e09173d8 TC |
1188 | |
1189 | /* And then rewrite the node itself. */ | |
1190 | complex_pattern::build (vinfo); | |
1191 | } | |
1192 | ||
478e571a TC |
1193 | /******************************************************************************* |
1194 | * complex_fms_pattern class | |
1195 | ******************************************************************************/ | |
1196 | ||
1197 | class complex_fms_pattern : public complex_pattern | |
1198 | { | |
1199 | protected: | |
1200 | complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
1201 | : complex_pattern (node, m_ops, ifn) | |
1202 | { | |
1203 | this->m_num_args = 3; | |
1204 | } | |
1205 | ||
1206 | public: | |
4665cfbc | 1207 | void build (vec_info *) final override; |
478e571a | 1208 | static internal_fn |
55d83cdf TC |
1209 | matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, |
1210 | slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *); | |
478e571a TC |
1211 | |
1212 | static vect_pattern* | |
55d83cdf TC |
1213 | recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *, |
1214 | slp_tree *); | |
478e571a TC |
1215 | |
1216 | static vect_pattern* | |
1217 | mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
1218 | { | |
1219 | return new complex_fms_pattern (node, m_ops, ifn); | |
1220 | } | |
1221 | }; | |
1222 | ||
1223 | ||
ed3de62a TC |
1224 | /* Pattern matcher for trying to match complex multiply and subtract pattern |
1225 | in SLP tree. If the operation matches then IFN is set to the operation | |
1226 | it matched and the arguments to the two replacement statements are put in | |
1227 | m_ops. | |
478e571a TC |
1228 | |
1229 | If no match is found then IFN is set to IFN_LAST and m_ops is unchanged. | |
1230 | ||
1231 | This function matches the patterns shaped as: | |
1232 | ||
1233 | double ax = (b[i+1] * a[i]) + (b[i] * a[i]); | |
1234 | double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]); | |
1235 | ||
1236 | c[i] = c[i] - ax; | |
1237 | c[i+1] = c[i+1] + bx; | |
1238 | ||
1239 | If a match occurred then TRUE is returned, else FALSE. The initial match is | |
1240 | expected to be in OP1 and the initial match operands in args0. */ | |
1241 | ||
1242 | internal_fn | |
1243 | complex_fms_pattern::matches (complex_operation_t op, | |
1244 | slp_tree_to_load_perm_map_t *perm_cache, | |
55d83cdf | 1245 | slp_compat_nodes_map_t *compat_cache, |
478e571a TC |
1246 | slp_tree * ref_node, vec<slp_tree> *ops) |
1247 | { | |
1248 | internal_fn ifn = IFN_LAST; | |
1249 | ||
478e571a TC |
1250 | /* We need to ignore the two_operands nodes that may also match, |
1251 | for that we can check if they have any scalar statements and also | |
1252 | check that it's not a permute node as we're looking for a normal | |
ed3de62a TC |
1253 | MINUS_EXPR operation. */ |
1254 | if (op != CMPLX_NONE) | |
478e571a TC |
1255 | return IFN_LAST; |
1256 | ||
ed3de62a TC |
1257 | slp_tree root = *ref_node; |
1258 | if (!vect_match_expression_p (root, MINUS_EXPR)) | |
478e571a TC |
1259 | return IFN_LAST; |
1260 | ||
55d83cdf TC |
1261 | /* TODO: Support invariants here, with the new layout CADD now |
1262 | can match before we get a chance to try CFMS. */ | |
ed3de62a TC |
1263 | auto nodes = SLP_TREE_CHILDREN (root); |
1264 | if (!vect_match_expression_p (nodes[1], MULT_EXPR) | |
1265 | || vect_detect_pair_op (nodes[0]) != PLUS_MINUS) | |
478e571a TC |
1266 | return IFN_LAST; |
1267 | ||
ed3de62a TC |
1268 | auto childs = SLP_TREE_CHILDREN (nodes[0]); |
1269 | auto l0node = SLP_TREE_CHILDREN (childs[0]); | |
ed3de62a | 1270 | |
478e571a TC |
1271 | /* Now operand2+4 may lead to another expression. */ |
1272 | auto_vec<slp_tree> left_op, right_op; | |
ed3de62a TC |
1273 | left_op.safe_splice (SLP_TREE_CHILDREN (l0node[1])); |
1274 | right_op.safe_splice (SLP_TREE_CHILDREN (nodes[1])); | |
478e571a | 1275 | |
6cc8aa65 TC |
1276 | /* If these nodes don't have any children then they're |
1277 | not ones we're interested in. */ | |
4f0a2f5a TC |
1278 | if (left_op.length () != 2 |
1279 | || right_op.length () != 2 | |
1280 | || !vect_match_expression_p (l0node[1], MULT_EXPR)) | |
6cc8aa65 TC |
1281 | return IFN_LAST; |
1282 | ||
55d83cdf TC |
1283 | enum _conj_status status; |
1284 | if (!vect_validate_multiplication (perm_cache, compat_cache, right_op, | |
1285 | left_op, true, &status)) | |
478e571a TC |
1286 | return IFN_LAST; |
1287 | ||
55d83cdf | 1288 | if (status == CONJ_NONE) |
478e571a | 1289 | ifn = IFN_COMPLEX_FMS; |
55d83cdf | 1290 | else |
478e571a TC |
1291 | ifn = IFN_COMPLEX_FMS_CONJ; |
1292 | ||
1293 | if (!vect_pattern_validate_optab (ifn, *ref_node)) | |
1294 | return IFN_LAST; | |
1295 | ||
1296 | ops->truncate (0); | |
1297 | ops->create (4); | |
1298 | ||
c3a2bc6d | 1299 | complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]); |
478e571a TC |
1300 | if (kind == PERM_EVENODD) |
1301 | { | |
ed3de62a | 1302 | ops->quick_push (l0node[0]); |
478e571a TC |
1303 | ops->quick_push (right_op[0]); |
1304 | ops->quick_push (right_op[1]); | |
1305 | ops->quick_push (left_op[1]); | |
1306 | } | |
478e571a TC |
1307 | else |
1308 | { | |
ed3de62a | 1309 | ops->quick_push (l0node[0]); |
478e571a TC |
1310 | ops->quick_push (right_op[1]); |
1311 | ops->quick_push (right_op[0]); | |
55d83cdf | 1312 | ops->quick_push (left_op[0]); |
478e571a TC |
1313 | } |
1314 | ||
1315 | return ifn; | |
1316 | } | |
1317 | ||
1318 | /* Attempt to recognize a complex mul pattern. */ | |
1319 | ||
1320 | vect_pattern* | |
1321 | complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache, | |
55d83cdf | 1322 | slp_compat_nodes_map_t *compat_cache, |
478e571a TC |
1323 | slp_tree *node) |
1324 | { | |
1325 | auto_vec<slp_tree> ops; | |
1326 | complex_operation_t op | |
1327 | = vect_detect_pair_op (*node, true, &ops); | |
1328 | internal_fn ifn | |
55d83cdf | 1329 | = complex_fms_pattern::matches (op, perm_cache, compat_cache, node, &ops); |
478e571a TC |
1330 | if (ifn == IFN_LAST) |
1331 | return NULL; | |
1332 | ||
1333 | return new complex_fms_pattern (node, &ops, ifn); | |
1334 | } | |
1335 | ||
1336 | /* Perform a replacement of the detected complex mul pattern with the new | |
1337 | instruction sequences. */ | |
1338 | ||
1339 | void | |
1340 | complex_fms_pattern::build (vec_info *vinfo) | |
1341 | { | |
1342 | slp_tree node; | |
1343 | unsigned i; | |
5296bd57 TC |
1344 | slp_tree newnode = |
1345 | vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node); | |
1346 | SLP_TREE_REF_COUNT (this->m_ops[0])++; | |
1347 | SLP_TREE_REF_COUNT (this->m_ops[1])++; | |
1348 | ||
478e571a TC |
1349 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node) |
1350 | vect_free_slp_tree (node); | |
1351 | ||
1352 | SLP_TREE_CHILDREN (*this->m_node).release (); | |
1353 | SLP_TREE_CHILDREN (*this->m_node).create (3); | |
1354 | ||
1355 | /* First re-arrange the children. */ | |
478e571a | 1356 | SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]); |
5296bd57 | 1357 | SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode); |
55d83cdf | 1358 | SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]); |
478e571a TC |
1359 | |
1360 | /* And then rewrite the node itself. */ | |
1361 | complex_pattern::build (vinfo); | |
1362 | } | |
1363 | ||
b50df1e7 TC |
1364 | /******************************************************************************* |
1365 | * complex_operations_pattern class | |
1366 | ******************************************************************************/ | |
1367 | ||
1368 | /* This function combines all the existing pattern matchers above into one class | |
1369 | that shares the functionality between them. The initial match is shared | |
1370 | between all complex operations. */ | |
1371 | ||
1372 | class complex_operations_pattern : public complex_pattern | |
1373 | { | |
1374 | protected: | |
1375 | complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops, | |
1376 | internal_fn ifn) | |
1377 | : complex_pattern (node, m_ops, ifn) | |
1378 | { | |
1379 | this->m_num_args = 0; | |
1380 | } | |
1381 | ||
1382 | public: | |
4665cfbc | 1383 | void build (vec_info *) final override; |
b50df1e7 | 1384 | static internal_fn |
55d83cdf TC |
1385 | matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, |
1386 | slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *); | |
b50df1e7 TC |
1387 | |
1388 | static vect_pattern* | |
55d83cdf TC |
1389 | recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *, |
1390 | slp_tree *); | |
b50df1e7 TC |
1391 | }; |
1392 | ||
1393 | /* Dummy matches implementation for proxy object. */ | |
1394 | ||
1395 | internal_fn | |
1396 | complex_operations_pattern:: | |
1397 | matches (complex_operation_t /* op */, | |
1398 | slp_tree_to_load_perm_map_t * /* perm_cache */, | |
55d83cdf | 1399 | slp_compat_nodes_map_t * /* compat_cache */, |
b50df1e7 TC |
1400 | slp_tree * /* ref_node */, vec<slp_tree> * /* ops */) |
1401 | { | |
1402 | return IFN_LAST; | |
1403 | } | |
1404 | ||
1405 | /* Attempt to recognize a complex mul pattern. */ | |
1406 | ||
1407 | vect_pattern* | |
1408 | complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache, | |
55d83cdf | 1409 | slp_compat_nodes_map_t *ccache, |
b50df1e7 TC |
1410 | slp_tree *node) |
1411 | { | |
1412 | auto_vec<slp_tree> ops; | |
1413 | complex_operation_t op | |
1414 | = vect_detect_pair_op (*node, true, &ops); | |
1415 | internal_fn ifn = IFN_LAST; | |
1416 | ||
55d83cdf | 1417 | ifn = complex_fms_pattern::matches (op, perm_cache, ccache, node, &ops); |
b50df1e7 TC |
1418 | if (ifn != IFN_LAST) |
1419 | return complex_fms_pattern::mkInstance (node, &ops, ifn); | |
1420 | ||
55d83cdf | 1421 | ifn = complex_mul_pattern::matches (op, perm_cache, ccache, node, &ops); |
b50df1e7 TC |
1422 | if (ifn != IFN_LAST) |
1423 | return complex_mul_pattern::mkInstance (node, &ops, ifn); | |
1424 | ||
55d83cdf | 1425 | ifn = complex_add_pattern::matches (op, perm_cache, ccache, node, &ops); |
b50df1e7 TC |
1426 | if (ifn != IFN_LAST) |
1427 | return complex_add_pattern::mkInstance (node, &ops, ifn); | |
1428 | ||
1429 | return NULL; | |
1430 | } | |
1431 | ||
1432 | /* Dummy implementation of build. */ | |
1433 | ||
1434 | void | |
1435 | complex_operations_pattern::build (vec_info * /* vinfo */) | |
1436 | { | |
1437 | gcc_unreachable (); | |
1438 | } | |
1439 | ||
7a6c31f0 RB |
1440 | |
1441 | /* The addsub_pattern. */ | |
1442 | ||
1443 | class addsub_pattern : public vect_pattern | |
1444 | { | |
1445 | public: | |
7d810646 RB |
1446 | addsub_pattern (slp_tree *node, internal_fn ifn) |
1447 | : vect_pattern (node, NULL, ifn) {}; | |
7a6c31f0 | 1448 | |
4665cfbc | 1449 | void build (vec_info *) final override; |
7a6c31f0 RB |
1450 | |
1451 | static vect_pattern* | |
55d83cdf TC |
1452 | recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *, |
1453 | slp_tree *); | |
7a6c31f0 RB |
1454 | }; |
1455 | ||
1456 | vect_pattern * | |
55d83cdf TC |
1457 | addsub_pattern::recognize (slp_tree_to_load_perm_map_t *, |
1458 | slp_compat_nodes_map_t *, slp_tree *node_) | |
7a6c31f0 RB |
1459 | { |
1460 | slp_tree node = *node_; | |
1461 | if (SLP_TREE_CODE (node) != VEC_PERM_EXPR | |
7d810646 RB |
1462 | || SLP_TREE_CHILDREN (node).length () != 2 |
1463 | || SLP_TREE_LANE_PERMUTATION (node).length () % 2) | |
7a6c31f0 RB |
1464 | return NULL; |
1465 | ||
1466 | /* Match a blend of a plus and a minus op with the same number of plus and | |
1467 | minus lanes on the same operands. */ | |
7d810646 RB |
1468 | unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first; |
1469 | unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first; | |
1470 | if (l0 == l1) | |
1471 | return NULL; | |
1472 | bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], | |
1473 | PLUS_EXPR); | |
1474 | if (!l0add_p | |
1475 | && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR)) | |
1476 | return NULL; | |
1477 | bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], | |
1478 | PLUS_EXPR); | |
1479 | if (!l1add_p | |
1480 | && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR)) | |
7a6c31f0 | 1481 | return NULL; |
7d810646 RB |
1482 | |
1483 | slp_tree l0node = SLP_TREE_CHILDREN (node)[l0]; | |
1484 | slp_tree l1node = SLP_TREE_CHILDREN (node)[l1]; | |
1485 | if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0] | |
1486 | && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1]) | |
1487 | || (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1] | |
1488 | && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0]))) | |
7a6c31f0 RB |
1489 | return NULL; |
1490 | ||
1491 | for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i) | |
1492 | { | |
1493 | std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i]; | |
7d810646 | 1494 | /* It has to be alternating -, +, -, |
7a6c31f0 RB |
1495 | While we could permute the .ADDSUB inputs and the .ADDSUB output |
1496 | that's only profitable over the add + sub + blend if at least | |
1497 | one of the permute is optimized which we can't determine here. */ | |
7d810646 | 1498 | if (perm.first != ((i & 1) ? l1 : l0) |
7a6c31f0 RB |
1499 | || perm.second != i) |
1500 | return NULL; | |
1501 | } | |
1502 | ||
7d810646 RB |
1503 | /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... } |
1504 | (l0add_p), see whether we have FMA variants. */ | |
1505 | if (!l0add_p | |
1506 | && vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], MULT_EXPR)) | |
1507 | { | |
1508 | /* (c * d) -+ a */ | |
1509 | if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node)) | |
1510 | return new addsub_pattern (node_, IFN_VEC_FMADDSUB); | |
1511 | } | |
1512 | else if (l0add_p | |
1513 | && vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], MULT_EXPR)) | |
1514 | { | |
1515 | /* (c * d) +- a */ | |
1516 | if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node)) | |
1517 | return new addsub_pattern (node_, IFN_VEC_FMSUBADD); | |
1518 | } | |
7a6c31f0 | 1519 | |
7d810646 RB |
1520 | if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node)) |
1521 | return new addsub_pattern (node_, IFN_VEC_ADDSUB); | |
1522 | ||
1523 | return NULL; | |
7a6c31f0 RB |
1524 | } |
1525 | ||
1526 | void | |
1527 | addsub_pattern::build (vec_info *vinfo) | |
1528 | { | |
1529 | slp_tree node = *m_node; | |
1530 | ||
7d810646 RB |
1531 | unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first; |
1532 | unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first; | |
1533 | ||
1534 | switch (m_ifn) | |
1535 | { | |
1536 | case IFN_VEC_ADDSUB: | |
1537 | { | |
1538 | slp_tree sub = SLP_TREE_CHILDREN (node)[l0]; | |
1539 | slp_tree add = SLP_TREE_CHILDREN (node)[l1]; | |
1540 | ||
1541 | /* Modify the blend node in-place. */ | |
1542 | SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0]; | |
1543 | SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1]; | |
1544 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++; | |
1545 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++; | |
1546 | ||
1547 | /* Build IFN_VEC_ADDSUB from the sub representative operands. */ | |
1548 | stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub); | |
1549 | gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2, | |
1550 | gimple_assign_rhs1 (rep->stmt), | |
1551 | gimple_assign_rhs2 (rep->stmt)); | |
1552 | gimple_call_set_lhs (call, make_ssa_name | |
1553 | (TREE_TYPE (gimple_assign_lhs (rep->stmt)))); | |
1554 | gimple_call_set_nothrow (call, true); | |
1555 | gimple_set_bb (call, gimple_bb (rep->stmt)); | |
1556 | stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep); | |
1557 | SLP_TREE_REPRESENTATIVE (node) = new_rep; | |
1558 | STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope; | |
1559 | STMT_SLP_TYPE (new_rep) = pure_slp; | |
1560 | STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node); | |
1561 | STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true; | |
1562 | STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep)); | |
1563 | SLP_TREE_CODE (node) = ERROR_MARK; | |
1564 | SLP_TREE_LANE_PERMUTATION (node).release (); | |
1565 | ||
1566 | vect_free_slp_tree (sub); | |
1567 | vect_free_slp_tree (add); | |
1568 | break; | |
1569 | } | |
1570 | case IFN_VEC_FMADDSUB: | |
1571 | case IFN_VEC_FMSUBADD: | |
1572 | { | |
1573 | slp_tree sub, add; | |
1574 | if (m_ifn == IFN_VEC_FMADDSUB) | |
1575 | { | |
1576 | sub = SLP_TREE_CHILDREN (node)[l0]; | |
1577 | add = SLP_TREE_CHILDREN (node)[l1]; | |
1578 | } | |
1579 | else /* m_ifn == IFN_VEC_FMSUBADD */ | |
1580 | { | |
1581 | sub = SLP_TREE_CHILDREN (node)[l1]; | |
1582 | add = SLP_TREE_CHILDREN (node)[l0]; | |
1583 | } | |
1584 | slp_tree mul = SLP_TREE_CHILDREN (sub)[0]; | |
1585 | /* Modify the blend node in-place. */ | |
1586 | SLP_TREE_CHILDREN (node).safe_grow (3, true); | |
1587 | SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0]; | |
1588 | SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1]; | |
1589 | SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1]; | |
1590 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++; | |
1591 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++; | |
1592 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++; | |
1593 | ||
1594 | /* Build IFN_VEC_FMADDSUB from the mul/sub representative operands. */ | |
1595 | stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub); | |
1596 | stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul); | |
1597 | gcall *call = gimple_build_call_internal (m_ifn, 3, | |
1598 | gimple_assign_rhs1 (mrep->stmt), | |
1599 | gimple_assign_rhs2 (mrep->stmt), | |
1600 | gimple_assign_rhs2 (srep->stmt)); | |
1601 | gimple_call_set_lhs (call, make_ssa_name | |
1602 | (TREE_TYPE (gimple_assign_lhs (srep->stmt)))); | |
1603 | gimple_call_set_nothrow (call, true); | |
1604 | gimple_set_bb (call, gimple_bb (srep->stmt)); | |
1605 | stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep); | |
1606 | SLP_TREE_REPRESENTATIVE (node) = new_rep; | |
1607 | STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope; | |
1608 | STMT_SLP_TYPE (new_rep) = pure_slp; | |
1609 | STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node); | |
1610 | STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true; | |
1611 | STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep)); | |
1612 | SLP_TREE_CODE (node) = ERROR_MARK; | |
1613 | SLP_TREE_LANE_PERMUTATION (node).release (); | |
1614 | ||
1615 | vect_free_slp_tree (sub); | |
1616 | vect_free_slp_tree (add); | |
1617 | break; | |
1618 | } | |
1619 | default:; | |
1620 | } | |
7a6c31f0 RB |
1621 | } |
1622 | ||
3ed472af TC |
1623 | /******************************************************************************* |
1624 | * Pattern matching definitions | |
1625 | ******************************************************************************/ | |
1626 | ||
1627 | #define SLP_PATTERN(x) &x::recognize | |
1628 | vect_pattern_decl_t slp_patterns[] | |
1629 | { | |
1630 | /* For least amount of back-tracking and more efficient matching | |
1631 | order patterns from the largest to the smallest. Especially if they | |
1632 | overlap in what they can detect. */ | |
1633 | ||
b50df1e7 | 1634 | SLP_PATTERN (complex_operations_pattern), |
7a6c31f0 | 1635 | SLP_PATTERN (addsub_pattern) |
3ed472af TC |
1636 | }; |
1637 | #undef SLP_PATTERN | |
1638 | ||
1639 | /* Set the number of SLP pattern matchers available. */ | |
ca32b29e | 1640 | size_t num__slp_patterns = ARRAY_SIZE (slp_patterns); |