]>
Commit | Line | Data |
---|---|---|
3ed472af | 1 | /* SLP - Pattern matcher on SLP trees |
99dee823 | 2 | Copyright (C) 2020-2021 Free Software Foundation, Inc. |
3ed472af TC |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "backend.h" | |
24 | #include "target.h" | |
25 | #include "rtl.h" | |
26 | #include "tree.h" | |
27 | #include "gimple.h" | |
28 | #include "tree-pass.h" | |
29 | #include "ssa.h" | |
30 | #include "optabs-tree.h" | |
31 | #include "insn-config.h" | |
32 | #include "recog.h" /* FIXME: for insn_data */ | |
33 | #include "fold-const.h" | |
34 | #include "stor-layout.h" | |
35 | #include "gimple-iterator.h" | |
36 | #include "cfgloop.h" | |
37 | #include "tree-vectorizer.h" | |
38 | #include "langhooks.h" | |
39 | #include "gimple-walk.h" | |
40 | #include "dbgcnt.h" | |
41 | #include "tree-vector-builder.h" | |
42 | #include "vec-perm-indices.h" | |
43 | #include "gimple-fold.h" | |
44 | #include "internal-fn.h" | |
45 | ||
46 | /* SLP Pattern matching mechanism. | |
47 | ||
48 | This extension to the SLP vectorizer allows one to transform the generated SLP | |
49 | tree based on any pattern. The difference between this and the normal vect | |
50 | pattern matcher is that unlike the former, this matcher allows you to match | |
51 | with instructions that do not belong to the same SSA dominator graph. | |
52 | ||
53 | The only requirement that this pattern matcher has is that you are only | |
54 | only allowed to either match an entire group or none. | |
55 | ||
56 | The pattern matcher currently only allows you to perform replacements to | |
57 | internal functions. | |
58 | ||
59 | Once the patterns are matched it is one way, these cannot be undone. It is | |
60 | currently not supported to match patterns recursively. | |
61 | ||
62 | To add a new pattern, implement the vect_pattern class and add the type to | |
63 | slp_patterns. | |
64 | ||
65 | */ | |
66 | ||
67 | /******************************************************************************* | |
68 | * vect_pattern class | |
69 | ******************************************************************************/ | |
70 | ||
71 | /* Default implementation of recognize that performs matching, validation and | |
72 | replacement of nodes but that can be overriden if required. */ | |
73 | ||
74 | static bool | |
75 | vect_pattern_validate_optab (internal_fn ifn, slp_tree node) | |
76 | { | |
77 | tree vectype = SLP_TREE_VECTYPE (node); | |
78 | if (ifn == IFN_LAST || !vectype) | |
79 | return false; | |
80 | ||
81 | if (dump_enabled_p ()) | |
82 | dump_printf_loc (MSG_NOTE, vect_location, | |
83 | "Found %s pattern in SLP tree\n", | |
84 | internal_fn_name (ifn)); | |
85 | ||
86 | if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED)) | |
87 | { | |
88 | if (dump_enabled_p ()) | |
89 | dump_printf_loc (MSG_NOTE, vect_location, | |
90 | "Target supports %s vectorization with mode %T\n", | |
91 | internal_fn_name (ifn), vectype); | |
92 | } | |
93 | else | |
94 | { | |
95 | if (dump_enabled_p ()) | |
96 | { | |
97 | if (!vectype) | |
98 | dump_printf_loc (MSG_NOTE, vect_location, | |
99 | "Target does not support vector type for %T\n", | |
100 | SLP_TREE_DEF_TYPE (node)); | |
101 | else | |
102 | dump_printf_loc (MSG_NOTE, vect_location, | |
103 | "Target does not support %s for vector type " | |
104 | "%T\n", internal_fn_name (ifn), vectype); | |
105 | } | |
106 | return false; | |
107 | } | |
108 | return true; | |
109 | } | |
110 | ||
111 | /******************************************************************************* | |
112 | * General helper types | |
113 | ******************************************************************************/ | |
114 | ||
115 | /* The COMPLEX_OPERATION enum denotes the possible pair of operations that can | |
116 | be matched when looking for expressions that we are interested matching for | |
117 | complex numbers addition and mla. */ | |
118 | ||
119 | typedef enum _complex_operation : unsigned { | |
120 | PLUS_PLUS, | |
121 | MINUS_PLUS, | |
122 | PLUS_MINUS, | |
123 | MULT_MULT, | |
124 | CMPLX_NONE | |
125 | } complex_operation_t; | |
126 | ||
127 | /******************************************************************************* | |
128 | * General helper functions | |
129 | ******************************************************************************/ | |
130 | ||
131 | /* Helper function of linear_loads_p that checks to see if the load permutation | |
132 | is sequential and in monotonically increasing order of loads with no gaps. | |
133 | */ | |
134 | ||
135 | static inline complex_perm_kinds_t | |
136 | is_linear_load_p (load_permutation_t loads) | |
137 | { | |
138 | if (loads.length() == 0) | |
139 | return PERM_UNKNOWN; | |
140 | ||
141 | unsigned load, i; | |
142 | complex_perm_kinds_t candidates[4] | |
39666d2b | 143 | = { PERM_ODDODD |
3ed472af | 144 | , PERM_EVENEVEN |
39666d2b TC |
145 | , PERM_EVENODD |
146 | , PERM_ODDEVEN | |
3ed472af TC |
147 | }; |
148 | ||
149 | int valid_patterns = 4; | |
39666d2b | 150 | FOR_EACH_VEC_ELT (loads, i, load) |
3ed472af | 151 | { |
39666d2b | 152 | if (candidates[0] != PERM_UNKNOWN && load != 1) |
3ed472af TC |
153 | { |
154 | candidates[0] = PERM_UNKNOWN; | |
155 | valid_patterns--; | |
156 | } | |
39666d2b | 157 | if (candidates[1] != PERM_UNKNOWN && load != 0) |
3ed472af TC |
158 | { |
159 | candidates[1] = PERM_UNKNOWN; | |
160 | valid_patterns--; | |
161 | } | |
39666d2b | 162 | if (candidates[2] != PERM_UNKNOWN && load != i) |
3ed472af TC |
163 | { |
164 | candidates[2] = PERM_UNKNOWN; | |
165 | valid_patterns--; | |
166 | } | |
39666d2b TC |
167 | if (candidates[3] != PERM_UNKNOWN |
168 | && load != (i % 2 == 0 ? i + 1 : i - 1)) | |
3ed472af TC |
169 | { |
170 | candidates[3] = PERM_UNKNOWN; | |
171 | valid_patterns--; | |
172 | } | |
173 | ||
174 | if (valid_patterns == 0) | |
175 | return PERM_UNKNOWN; | |
176 | } | |
177 | ||
178 | for (i = 0; i < sizeof(candidates); i++) | |
179 | if (candidates[i] != PERM_UNKNOWN) | |
180 | return candidates[i]; | |
181 | ||
182 | return PERM_UNKNOWN; | |
183 | } | |
184 | ||
185 | /* Combine complex_perm_kinds A and B into a new permute kind that describes the | |
186 | resulting operation. */ | |
187 | ||
188 | static inline complex_perm_kinds_t | |
189 | vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b) | |
190 | { | |
191 | if (a == b) | |
192 | return a; | |
193 | ||
194 | if (a == PERM_TOP) | |
195 | return b; | |
196 | ||
197 | if (b == PERM_TOP) | |
198 | return a; | |
199 | ||
200 | return PERM_UNKNOWN; | |
201 | } | |
202 | ||
203 | /* Check to see if all loads rooted in ROOT are linear. Linearity is | |
204 | defined as having no gaps between values loaded. */ | |
205 | ||
c3a2bc6d | 206 | static complex_perm_kinds_t |
3ed472af TC |
207 | linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root) |
208 | { | |
209 | if (!root) | |
c3a2bc6d | 210 | return PERM_UNKNOWN; |
3ed472af TC |
211 | |
212 | unsigned i; | |
c3a2bc6d | 213 | complex_perm_kinds_t *tmp; |
3ed472af TC |
214 | |
215 | if ((tmp = perm_cache->get (root)) != NULL) | |
216 | return *tmp; | |
217 | ||
c3a2bc6d | 218 | complex_perm_kinds_t retval = PERM_UNKNOWN; |
3ed472af TC |
219 | perm_cache->put (root, retval); |
220 | ||
221 | /* If it's a load node, then just read the load permute. */ | |
222 | if (SLP_TREE_LOAD_PERMUTATION (root).exists ()) | |
223 | { | |
c3a2bc6d | 224 | retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root)); |
3ed472af TC |
225 | perm_cache->put (root, retval); |
226 | return retval; | |
227 | } | |
228 | else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def) | |
229 | { | |
c3a2bc6d | 230 | retval = PERM_TOP; |
bd4298e1 | 231 | perm_cache->put (root, retval); |
3ed472af TC |
232 | return retval; |
233 | } | |
234 | ||
3ed472af TC |
235 | complex_perm_kinds_t kind = PERM_TOP; |
236 | ||
237 | slp_tree child; | |
238 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child) | |
239 | { | |
c3a2bc6d TC |
240 | complex_perm_kinds_t res = linear_loads_p (perm_cache, child); |
241 | kind = vect_merge_perms (kind, res); | |
159b0bd9 | 242 | /* Unknown and Top are not valid on blends as they produce no permute. */ |
c3a2bc6d | 243 | retval = kind; |
159b0bd9 | 244 | if (kind == PERM_UNKNOWN || kind == PERM_TOP) |
3ed472af | 245 | return retval; |
3ed472af TC |
246 | } |
247 | ||
c3a2bc6d | 248 | retval = kind; |
3ed472af TC |
249 | |
250 | perm_cache->put (root, retval); | |
251 | return retval; | |
252 | } | |
253 | ||
254 | ||
255 | /* This function attempts to make a node rooted in NODE is linear. If the node | |
256 | if already linear than the node itself is returned in RESULT. | |
257 | ||
258 | If the node is not linear then a new VEC_PERM_EXPR node is created with a | |
259 | lane permute that when applied will make the node linear. If such a | |
260 | permute cannot be created then FALSE is returned from the function. | |
261 | ||
262 | Here linearity is defined as having a sequential, monotically increasing | |
263 | load position inside the load permute generated by the loads reachable from | |
264 | NODE. */ | |
265 | ||
266 | static slp_tree | |
267 | vect_build_swap_evenodd_node (slp_tree node) | |
268 | { | |
269 | /* Attempt to linearise the permute. */ | |
270 | vec<std::pair<unsigned, unsigned> > zipped; | |
271 | zipped.create (SLP_TREE_LANES (node)); | |
272 | ||
273 | for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2) | |
274 | { | |
275 | zipped.quick_push (std::make_pair (0, x+1)); | |
276 | zipped.quick_push (std::make_pair (0, x)); | |
277 | } | |
278 | ||
279 | /* Create the new permute node and store it instead. */ | |
280 | slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR); | |
281 | SLP_TREE_LANE_PERMUTATION (vnode) = zipped; | |
282 | SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node); | |
283 | SLP_TREE_CHILDREN (vnode).quick_push (node); | |
284 | SLP_TREE_REF_COUNT (vnode) = 1; | |
285 | SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node); | |
286 | SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node); | |
287 | SLP_TREE_REF_COUNT (node)++; | |
288 | return vnode; | |
289 | } | |
290 | ||
291 | /* Checks to see of the expression represented by NODE is a gimple assign with | |
292 | code CODE. */ | |
293 | ||
294 | static inline bool | |
295 | vect_match_expression_p (slp_tree node, tree_code code) | |
296 | { | |
297 | if (!node | |
298 | || !SLP_TREE_REPRESENTATIVE (node)) | |
299 | return false; | |
300 | ||
301 | gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)); | |
302 | if (!is_gimple_assign (expr) | |
303 | || gimple_assign_rhs_code (expr) != code) | |
304 | return false; | |
305 | ||
306 | return true; | |
307 | } | |
308 | ||
31fac318 TC |
309 | /* Checks to see if the expression represented by NODE is a call to the internal |
310 | function FN. */ | |
311 | ||
312 | static inline bool | |
313 | vect_match_call_p (slp_tree node, internal_fn fn) | |
314 | { | |
315 | if (!node | |
316 | || !SLP_TREE_REPRESENTATIVE (node)) | |
317 | return false; | |
318 | ||
319 | gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)); | |
320 | if (!expr | |
321 | || !gimple_call_internal_p (expr, fn)) | |
322 | return false; | |
323 | ||
324 | return true; | |
325 | } | |
326 | ||
3ed472af TC |
327 | /* Check if the given lane permute in PERMUTES matches an alternating sequence |
328 | of {even odd even odd ...}. This to account for unrolled loops. Further | |
329 | mode there resulting permute must be linear. */ | |
330 | ||
331 | static inline bool | |
332 | vect_check_evenodd_blend (lane_permutation_t &permutes, | |
333 | unsigned even, unsigned odd) | |
334 | { | |
9c68e2ab TC |
335 | if (permutes.length () == 0 |
336 | || permutes.length () % 2 != 0) | |
3ed472af TC |
337 | return false; |
338 | ||
339 | unsigned val[2] = {even, odd}; | |
340 | unsigned seed = 0; | |
341 | for (unsigned i = 0; i < permutes.length (); i++) | |
342 | if (permutes[i].first != val[i % 2] | |
343 | || permutes[i].second != seed++) | |
344 | return false; | |
345 | ||
346 | return true; | |
347 | } | |
348 | ||
349 | /* This function will match the two gimple expressions representing NODE1 and | |
350 | NODE2 in parallel and returns the pair operation that represents the two | |
351 | expressions in the two statements. | |
352 | ||
353 | If match is successful then the corresponding complex_operation is | |
354 | returned and the arguments to the two matched operations are returned in OPS. | |
355 | ||
356 | If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select | |
357 | from the two nodes alternatingly. | |
358 | ||
359 | If unsuccessful then CMPLX_NONE is returned and OPS is untouched. | |
360 | ||
361 | e.g. the following gimple statements | |
362 | ||
363 | stmt 0 _39 = _37 + _12; | |
364 | stmt 1 _6 = _38 - _36; | |
365 | ||
366 | will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}. | |
367 | */ | |
368 | ||
369 | static complex_operation_t | |
370 | vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes, | |
371 | bool two_operands = true, vec<slp_tree> *ops = NULL) | |
372 | { | |
373 | complex_operation_t result = CMPLX_NONE; | |
374 | ||
375 | if (vect_match_expression_p (node1, MINUS_EXPR) | |
376 | && vect_match_expression_p (node2, PLUS_EXPR) | |
377 | && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1))) | |
378 | result = MINUS_PLUS; | |
379 | else if (vect_match_expression_p (node1, PLUS_EXPR) | |
380 | && vect_match_expression_p (node2, MINUS_EXPR) | |
381 | && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1))) | |
382 | result = PLUS_MINUS; | |
383 | else if (vect_match_expression_p (node1, PLUS_EXPR) | |
384 | && vect_match_expression_p (node2, PLUS_EXPR)) | |
385 | result = PLUS_PLUS; | |
386 | else if (vect_match_expression_p (node1, MULT_EXPR) | |
387 | && vect_match_expression_p (node2, MULT_EXPR)) | |
388 | result = MULT_MULT; | |
389 | ||
390 | if (result != CMPLX_NONE && ops != NULL) | |
391 | { | |
5296bd57 TC |
392 | ops->safe_push (node1); |
393 | ops->safe_push (node2); | |
3ed472af TC |
394 | } |
395 | return result; | |
396 | } | |
397 | ||
398 | /* Overload of vect_detect_pair_op that matches against the representative | |
399 | statements in the children of NODE. It is expected that NODE has exactly | |
400 | two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */ | |
401 | ||
402 | static complex_operation_t | |
403 | vect_detect_pair_op (slp_tree node, bool two_operands = true, | |
404 | vec<slp_tree> *ops = NULL) | |
405 | { | |
406 | if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR) | |
407 | return CMPLX_NONE; | |
408 | ||
409 | if (SLP_TREE_CHILDREN (node).length () != 2) | |
410 | return CMPLX_NONE; | |
411 | ||
412 | vec<slp_tree> children = SLP_TREE_CHILDREN (node); | |
413 | lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node); | |
414 | ||
415 | return vect_detect_pair_op (children[0], children[1], lanes, two_operands, | |
416 | ops); | |
417 | } | |
418 | ||
419 | /******************************************************************************* | |
420 | * complex_pattern class | |
421 | ******************************************************************************/ | |
422 | ||
423 | /* SLP Complex Numbers pattern matching. | |
424 | ||
425 | As an example, the following simple loop: | |
426 | ||
427 | double a[restrict N]; double b[restrict N]; double c[restrict N]; | |
428 | ||
429 | for (int i=0; i < N; i+=2) | |
430 | { | |
431 | c[i] = a[i] - b[i+1]; | |
432 | c[i+1] = a[i+1] + b[i]; | |
433 | } | |
434 | ||
435 | which represents a complex addition on with a rotation of 90* around the | |
436 | argand plane. i.e. if `a` and `b` were complex numbers then this would be the | |
437 | same as `a + (b * I)`. | |
438 | ||
439 | Here the expressions for `c[i]` and `c[i+1]` are independent but have to be | |
440 | both recognized in order for the pattern to work. As an SLP tree this is | |
441 | represented as | |
442 | ||
443 | +--------------------------------+ | |
444 | | stmt 0 *_9 = _10; | | |
445 | | stmt 1 *_15 = _16; | | |
446 | +--------------------------------+ | |
447 | | | |
448 | | | |
449 | v | |
450 | +--------------------------------+ | |
451 | | stmt 0 _10 = _4 - _8; | | |
452 | | stmt 1 _16 = _12 + _14; | | |
453 | | lane permutation { 0[0] 1[1] } | | |
454 | +--------------------------------+ | |
455 | | | | |
456 | | | | |
457 | | | | |
458 | +-----+ | | +-----+ | |
459 | | | | | | | | |
460 | +-----| { } |<-----+ +----->| { } --------+ | |
461 | | | | +------------------| | | | |
462 | | +-----+ | +-----+ | | |
463 | | | | | | |
464 | | | | | | |
465 | | +------|------------------+ | | |
466 | | | | | | |
467 | v v v v | |
468 | +--------------------------+ +--------------------------------+ | |
469 | | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; | | |
470 | | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; | | |
471 | | load permutation { 1 0 } | | load permutation { 0 1 } | | |
472 | +--------------------------+ +--------------------------------+ | |
473 | ||
474 | The pattern matcher allows you to replace both statements 0 and 1 or none at | |
475 | all. Because this operation is a two operands operation the actual nodes | |
476 | being replaced are those in the { } nodes. The actual scalar statements | |
477 | themselves are not replaced or used during the matching but instead the | |
478 | SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to | |
479 | replace and match on any number of nodes. | |
480 | ||
481 | Because the pattern matcher matches on the representative statement for the | |
482 | SLP node the case of two_operators it allows you to match the children of the | |
483 | node. This is done using the method `recognize ()`. | |
484 | ||
485 | */ | |
486 | ||
487 | /* The complex_pattern class contains common code for pattern matchers that work | |
488 | on complex numbers. These provide functionality to allow de-construction and | |
489 | validation of sequences depicting/transforming REAL and IMAG pairs. */ | |
490 | ||
491 | class complex_pattern : public vect_pattern | |
492 | { | |
493 | protected: | |
494 | auto_vec<slp_tree> m_workset; | |
495 | complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
496 | : vect_pattern (node, m_ops, ifn) | |
497 | { | |
498 | this->m_workset.safe_push (*node); | |
499 | } | |
500 | ||
501 | public: | |
502 | void build (vec_info *); | |
503 | ||
504 | static internal_fn | |
0c18faac | 505 | matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *, |
3ed472af TC |
506 | vec<slp_tree> *); |
507 | }; | |
508 | ||
509 | /* Create a replacement pattern statement for each node in m_node and inserts | |
510 | the new statement into m_node as the new representative statement. The old | |
511 | statement is marked as being in a pattern defined by the new statement. The | |
512 | statement is created as call to internal function IFN with m_num_args | |
513 | arguments. | |
514 | ||
515 | Futhermore the new pattern is also added to the vectorization information | |
516 | structure VINFO and the old statement STMT_INFO is marked as unused while | |
517 | the new statement is marked as used and the number of SLP uses of the new | |
518 | statement is incremented. | |
519 | ||
520 | The newly created SLP nodes are marked as SLP only and will be dissolved | |
521 | if SLP is aborted. | |
522 | ||
523 | The newly created gimple call is returned and the BB remains unchanged. | |
524 | ||
525 | This default method is designed to only match against simple operands where | |
526 | all the input and output types are the same. | |
527 | */ | |
528 | ||
529 | void | |
530 | complex_pattern::build (vec_info *vinfo) | |
531 | { | |
532 | stmt_vec_info stmt_info; | |
533 | ||
534 | auto_vec<tree> args; | |
535 | args.create (this->m_num_args); | |
536 | args.quick_grow_cleared (this->m_num_args); | |
537 | slp_tree node; | |
538 | unsigned ix; | |
539 | stmt_vec_info call_stmt_info; | |
540 | gcall *call_stmt = NULL; | |
541 | ||
542 | /* Now modify the nodes themselves. */ | |
543 | FOR_EACH_VEC_ELT (this->m_workset, ix, node) | |
544 | { | |
545 | /* Calculate the location of the statement in NODE to replace. */ | |
546 | stmt_info = SLP_TREE_REPRESENTATIVE (node); | |
374f93da RB |
547 | stmt_vec_info reduc_def |
548 | = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)); | |
3ed472af TC |
549 | gimple* old_stmt = STMT_VINFO_STMT (stmt_info); |
550 | tree lhs_old_stmt = gimple_get_lhs (old_stmt); | |
551 | tree type = TREE_TYPE (lhs_old_stmt); | |
552 | ||
553 | /* Create the argument set for use by gimple_build_call_internal_vec. */ | |
554 | for (unsigned i = 0; i < this->m_num_args; i++) | |
555 | args[i] = lhs_old_stmt; | |
556 | ||
557 | /* Create the new pattern statements. */ | |
558 | call_stmt = gimple_build_call_internal_vec (this->m_ifn, args); | |
559 | tree var = make_temp_ssa_name (type, call_stmt, "slp_patt"); | |
560 | gimple_call_set_lhs (call_stmt, var); | |
561 | gimple_set_location (call_stmt, gimple_location (old_stmt)); | |
562 | gimple_call_set_nothrow (call_stmt, true); | |
563 | ||
564 | /* Adjust the book-keeping for the new and old statements for use during | |
565 | SLP. This is required to get the right VF and statement during SLP | |
566 | analysis. These changes are created after relevancy has been set for | |
567 | the nodes as such we need to manually update them. Any changes will be | |
568 | undone if SLP is cancelled. */ | |
569 | call_stmt_info | |
570 | = vinfo->add_pattern_stmt (call_stmt, stmt_info); | |
571 | ||
572 | /* Make sure to mark the representative statement pure_slp and | |
374f93da | 573 | relevant and transfer reduction info. */ |
3ed472af TC |
574 | STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope; |
575 | STMT_SLP_TYPE (call_stmt_info) = pure_slp; | |
374f93da | 576 | STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def; |
3ed472af | 577 | |
a29124d2 RB |
578 | gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt)); |
579 | STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node); | |
5e606ed9 | 580 | STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true; |
3ed472af TC |
581 | |
582 | /* Since we are replacing all the statements in the group with the same | |
583 | thing it doesn't really matter. So just set it every time a new stmt | |
584 | is created. */ | |
585 | SLP_TREE_REPRESENTATIVE (node) = call_stmt_info; | |
586 | SLP_TREE_LANE_PERMUTATION (node).release (); | |
587 | SLP_TREE_CODE (node) = CALL_EXPR; | |
588 | } | |
589 | } | |
590 | ||
591 | /******************************************************************************* | |
592 | * complex_add_pattern class | |
593 | ******************************************************************************/ | |
594 | ||
595 | class complex_add_pattern : public complex_pattern | |
596 | { | |
597 | protected: | |
598 | complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
599 | : complex_pattern (node, m_ops, ifn) | |
600 | { | |
601 | this->m_num_args = 2; | |
602 | } | |
603 | ||
604 | public: | |
605 | void build (vec_info *); | |
606 | static internal_fn | |
0c18faac | 607 | matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *, |
3ed472af TC |
608 | vec<slp_tree> *); |
609 | ||
610 | static vect_pattern* | |
611 | recognize (slp_tree_to_load_perm_map_t *, slp_tree *); | |
0c18faac TC |
612 | |
613 | static vect_pattern* | |
614 | mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
615 | { | |
616 | return new complex_add_pattern (node, m_ops, ifn); | |
617 | } | |
3ed472af TC |
618 | }; |
619 | ||
620 | /* Perform a replacement of the detected complex add pattern with the new | |
621 | instruction sequences. */ | |
622 | ||
623 | void | |
624 | complex_add_pattern::build (vec_info *vinfo) | |
625 | { | |
fe701195 TC |
626 | SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2); |
627 | ||
3ed472af TC |
628 | slp_tree node = this->m_ops[0]; |
629 | vec<slp_tree> children = SLP_TREE_CHILDREN (node); | |
630 | ||
631 | /* First re-arrange the children. */ | |
fe701195 TC |
632 | SLP_TREE_CHILDREN (*this->m_node)[0] = children[0]; |
633 | SLP_TREE_CHILDREN (*this->m_node)[1] = | |
634 | vect_build_swap_evenodd_node (children[1]); | |
3ed472af | 635 | |
fe701195 TC |
636 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++; |
637 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++; | |
0c18faac TC |
638 | vect_free_slp_tree (this->m_ops[0]); |
639 | vect_free_slp_tree (this->m_ops[1]); | |
640 | ||
3ed472af TC |
641 | complex_pattern::build (vinfo); |
642 | } | |
643 | ||
644 | /* Pattern matcher for trying to match complex addition pattern in SLP tree. | |
645 | ||
646 | If no match is found then IFN is set to IFN_LAST. | |
647 | This function matches the patterns shaped as: | |
648 | ||
649 | c[i] = a[i] - b[i+1]; | |
650 | c[i+1] = a[i+1] + b[i]; | |
651 | ||
652 | If a match occurred then TRUE is returned, else FALSE. The initial match is | |
653 | expected to be in OP1 and the initial match operands in args0. */ | |
654 | ||
655 | internal_fn | |
656 | complex_add_pattern::matches (complex_operation_t op, | |
657 | slp_tree_to_load_perm_map_t *perm_cache, | |
0c18faac | 658 | slp_tree *node, vec<slp_tree> *ops) |
3ed472af TC |
659 | { |
660 | internal_fn ifn = IFN_LAST; | |
661 | ||
662 | /* Find the two components. Rotation in the complex plane will modify | |
663 | the operations: | |
664 | ||
665 | * Rotation 0: + + | |
666 | * Rotation 90: - + | |
667 | * Rotation 180: - - | |
668 | * Rotation 270: + - | |
669 | ||
670 | Rotation 0 and 180 can be handled by normal SIMD code, so we don't need | |
671 | to care about them here. */ | |
672 | if (op == MINUS_PLUS) | |
673 | ifn = IFN_COMPLEX_ADD_ROT90; | |
674 | else if (op == PLUS_MINUS) | |
675 | ifn = IFN_COMPLEX_ADD_ROT270; | |
676 | else | |
677 | return ifn; | |
678 | ||
679 | /* verify that there is a permute, otherwise this isn't a pattern we | |
680 | we support. */ | |
681 | gcc_assert (ops->length () == 2); | |
682 | ||
683 | vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]); | |
684 | ||
685 | /* First node must be unpermuted. */ | |
c3a2bc6d | 686 | if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD) |
3ed472af TC |
687 | return IFN_LAST; |
688 | ||
689 | /* Second node must be permuted. */ | |
c3a2bc6d | 690 | if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN) |
3ed472af TC |
691 | return IFN_LAST; |
692 | ||
0c18faac TC |
693 | if (!vect_pattern_validate_optab (ifn, *node)) |
694 | return IFN_LAST; | |
695 | ||
3ed472af TC |
696 | return ifn; |
697 | } | |
698 | ||
699 | /* Attempt to recognize a complex add pattern. */ | |
700 | ||
701 | vect_pattern* | |
702 | complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache, | |
703 | slp_tree *node) | |
704 | { | |
705 | auto_vec<slp_tree> ops; | |
706 | complex_operation_t op | |
707 | = vect_detect_pair_op (*node, true, &ops); | |
0c18faac TC |
708 | internal_fn ifn |
709 | = complex_add_pattern::matches (op, perm_cache, node, &ops); | |
710 | if (ifn == IFN_LAST) | |
3ed472af TC |
711 | return NULL; |
712 | ||
713 | return new complex_add_pattern (node, &ops, ifn); | |
714 | } | |
715 | ||
e09173d8 TC |
716 | /******************************************************************************* |
717 | * complex_mul_pattern | |
718 | ******************************************************************************/ | |
719 | ||
720 | /* Helper function of that looks for a match in the CHILDth child of NODE. The | |
721 | child used is stored in RES. | |
722 | ||
723 | If the match is successful then ARGS will contain the operands matched | |
724 | and the complex_operation_t type is returned. If match is not successful | |
725 | then CMPLX_NONE is returned and ARGS is left unmodified. */ | |
726 | ||
727 | static inline complex_operation_t | |
728 | vect_match_call_complex_mla (slp_tree node, unsigned child, | |
729 | vec<slp_tree> *args = NULL, slp_tree *res = NULL) | |
730 | { | |
731 | gcc_assert (child < SLP_TREE_CHILDREN (node).length ()); | |
732 | ||
733 | slp_tree data = SLP_TREE_CHILDREN (node)[child]; | |
734 | ||
735 | if (res) | |
736 | *res = data; | |
737 | ||
738 | return vect_detect_pair_op (data, false, args); | |
739 | } | |
740 | ||
741 | /* Check to see if either of the trees in ARGS are a NEGATE_EXPR. If the first | |
742 | child (args[0]) is a NEGATE_EXPR then NEG_FIRST_P is set to TRUE. | |
743 | ||
744 | If a negate is found then the values in ARGS are reordered such that the | |
745 | negate node is always the second one and the entry is replaced by the child | |
746 | of the negate node. */ | |
747 | ||
748 | static inline bool | |
a3d3e8c3 | 749 | vect_normalize_conj_loc (vec<slp_tree> &args, bool *neg_first_p = NULL) |
e09173d8 TC |
750 | { |
751 | gcc_assert (args.length () == 2); | |
752 | bool neg_found = false; | |
753 | ||
754 | if (vect_match_expression_p (args[0], NEGATE_EXPR)) | |
755 | { | |
756 | std::swap (args[0], args[1]); | |
757 | neg_found = true; | |
758 | if (neg_first_p) | |
759 | *neg_first_p = true; | |
760 | } | |
761 | else if (vect_match_expression_p (args[1], NEGATE_EXPR)) | |
762 | { | |
763 | neg_found = true; | |
764 | if (neg_first_p) | |
765 | *neg_first_p = false; | |
766 | } | |
767 | ||
768 | if (neg_found) | |
769 | args[1] = SLP_TREE_CHILDREN (args[1])[0]; | |
770 | ||
771 | return neg_found; | |
772 | } | |
773 | ||
774 | /* Helper function to check if PERM is KIND or PERM_TOP. */ | |
775 | ||
776 | static inline bool | |
c3a2bc6d | 777 | is_eq_or_top (complex_perm_kinds_t perm, complex_perm_kinds_t kind) |
e09173d8 | 778 | { |
c3a2bc6d | 779 | return perm == kind || perm == PERM_TOP; |
e09173d8 TC |
780 | } |
781 | ||
782 | /* Helper function that checks to see if LEFT_OP and RIGHT_OP are both MULT_EXPR | |
783 | nodes but also that they represent an operation that is either a complex | |
784 | multiplication or a complex multiplication by conjugated value. | |
785 | ||
786 | Of the negation is expected to be in the first half of the tree (As required | |
787 | by an FMS pattern) then NEG_FIRST is true. If the operation is a conjugate | |
788 | operation then CONJ_FIRST_OPERAND is set to indicate whether the first or | |
789 | second operand contains the conjugate operation. */ | |
790 | ||
791 | static inline bool | |
792 | vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache, | |
00dcc88a MS |
793 | const vec<slp_tree> &left_op, |
794 | const vec<slp_tree> &right_op, | |
e09173d8 TC |
795 | bool neg_first, bool *conj_first_operand, |
796 | bool fms) | |
797 | { | |
798 | /* The presence of a negation indicates that we have either a conjugate or a | |
799 | rotation. We need to distinguish which one. */ | |
800 | *conj_first_operand = false; | |
801 | complex_perm_kinds_t kind; | |
802 | ||
803 | /* Complex conjugates have the negation on the imaginary part of the | |
804 | number where rotations affect the real component. So check if the | |
805 | negation is on a dup of lane 1. */ | |
806 | if (fms) | |
807 | { | |
808 | /* Canonicalization for fms is not consistent. So have to test both | |
809 | variants to be sure. This needs to be fixed in the mid-end so | |
810 | this part can be simpler. */ | |
c3a2bc6d | 811 | kind = linear_loads_p (perm_cache, right_op[0]); |
478e571a | 812 | if (!((is_eq_or_top (linear_loads_p (perm_cache, right_op[0]), PERM_ODDODD) |
e09173d8 TC |
813 | && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]), |
814 | PERM_ODDEVEN)) | |
815 | || (kind == PERM_ODDEVEN | |
816 | && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]), | |
817 | PERM_ODDODD)))) | |
818 | return false; | |
819 | } | |
820 | else | |
821 | { | |
c3a2bc6d | 822 | if (linear_loads_p (perm_cache, right_op[1]) != PERM_ODDODD |
e09173d8 TC |
823 | && !is_eq_or_top (linear_loads_p (perm_cache, right_op[0]), |
824 | PERM_ODDEVEN)) | |
825 | return false; | |
826 | } | |
827 | ||
828 | /* Deal with differences in indexes. */ | |
829 | int index1 = fms ? 1 : 0; | |
830 | int index2 = fms ? 0 : 1; | |
831 | ||
832 | /* Check if the conjugate is on the second first or second operand. The | |
833 | order of the node with the conjugate value determines this, and the dup | |
834 | node must be one of lane 0 of the same DR as the neg node. */ | |
c3a2bc6d | 835 | kind = linear_loads_p (perm_cache, left_op[index1]); |
e09173d8 TC |
836 | if (kind == PERM_TOP) |
837 | { | |
c3a2bc6d | 838 | if (linear_loads_p (perm_cache, left_op[index2]) == PERM_EVENODD) |
e09173d8 TC |
839 | return true; |
840 | } | |
841 | else if (kind == PERM_EVENODD) | |
842 | { | |
c3a2bc6d | 843 | if ((kind = linear_loads_p (perm_cache, left_op[index2])) == PERM_EVENODD) |
e09173d8 | 844 | return false; |
478e571a | 845 | return true; |
e09173d8 TC |
846 | } |
847 | else if (!neg_first) | |
848 | *conj_first_operand = true; | |
849 | else | |
850 | return false; | |
851 | ||
852 | if (kind != PERM_EVENEVEN) | |
853 | return false; | |
854 | ||
855 | return true; | |
856 | } | |
857 | ||
858 | /* Helper function to help distinguish between a conjugate and a rotation in a | |
859 | complex multiplication. The operations have similar shapes but the order of | |
860 | the load permutes are different. This function returns TRUE when the order | |
861 | is consistent with a multiplication or multiplication by conjugated | |
862 | operand but returns FALSE if it's a multiplication by rotated operand. */ | |
863 | ||
864 | static inline bool | |
865 | vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache, | |
00dcc88a MS |
866 | const vec<slp_tree> &op, |
867 | complex_perm_kinds_t permKind) | |
e09173d8 TC |
868 | { |
869 | /* The left node is the more common case, test it first. */ | |
870 | if (!is_eq_or_top (linear_loads_p (perm_cache, op[0]), permKind)) | |
871 | { | |
872 | if (!is_eq_or_top (linear_loads_p (perm_cache, op[1]), permKind)) | |
873 | return false; | |
874 | } | |
875 | return true; | |
876 | } | |
877 | ||
878 | /* This function combines two nodes containing only even and only odd lanes | |
879 | together into a single node which contains the nodes in even/odd order | |
880 | by using a lane permute. | |
881 | ||
882 | The lanes in EVEN and ODD are duplicated 2 times inside the vectors. | |
883 | So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}. | |
884 | ||
885 | The tree REPRESENTATION is taken from the supplied REP along with the | |
886 | vectype which must be the same between all three nodes. | |
887 | */ | |
888 | ||
889 | static slp_tree | |
890 | vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep) | |
891 | { | |
892 | vec<std::pair<unsigned, unsigned> > perm; | |
893 | perm.create (SLP_TREE_LANES (rep)); | |
894 | ||
895 | for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2) | |
896 | { | |
897 | perm.quick_push (std::make_pair (0, x)); | |
898 | perm.quick_push (std::make_pair (1, x+1)); | |
899 | } | |
900 | ||
901 | slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even)); | |
902 | SLP_TREE_CODE (vnode) = VEC_PERM_EXPR; | |
903 | SLP_TREE_LANE_PERMUTATION (vnode) = perm; | |
904 | ||
905 | SLP_TREE_CHILDREN (vnode).create (2); | |
906 | SLP_TREE_CHILDREN (vnode).quick_push (even); | |
907 | SLP_TREE_CHILDREN (vnode).quick_push (odd); | |
908 | SLP_TREE_REF_COUNT (even)++; | |
909 | SLP_TREE_REF_COUNT (odd)++; | |
910 | SLP_TREE_REF_COUNT (vnode) = 1; | |
911 | ||
912 | SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep); | |
913 | gcc_assert (perm.length () == SLP_TREE_LANES (vnode)); | |
914 | /* Representation is set to that of the current node as the vectorizer | |
915 | can't deal with VEC_PERMs with no representation, as would be the | |
916 | case with invariants. */ | |
917 | SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep); | |
918 | SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep); | |
919 | return vnode; | |
920 | } | |
921 | ||
922 | class complex_mul_pattern : public complex_pattern | |
923 | { | |
924 | protected: | |
925 | complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
926 | : complex_pattern (node, m_ops, ifn) | |
927 | { | |
928 | this->m_num_args = 2; | |
929 | } | |
930 | ||
931 | public: | |
932 | void build (vec_info *); | |
933 | static internal_fn | |
934 | matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *, | |
935 | vec<slp_tree> *); | |
936 | ||
937 | static vect_pattern* | |
938 | recognize (slp_tree_to_load_perm_map_t *, slp_tree *); | |
939 | ||
940 | static vect_pattern* | |
941 | mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
942 | { | |
943 | return new complex_mul_pattern (node, m_ops, ifn); | |
944 | } | |
945 | ||
946 | }; | |
947 | ||
948 | /* Pattern matcher for trying to match complex multiply pattern in SLP tree | |
949 | If the operation matches then IFN is set to the operation it matched | |
950 | and the arguments to the two replacement statements are put in m_ops. | |
951 | ||
952 | If no match is found then IFN is set to IFN_LAST and m_ops is unchanged. | |
953 | ||
954 | This function matches the patterns shaped as: | |
955 | ||
956 | double ax = (b[i+1] * a[i]); | |
957 | double bx = (a[i+1] * b[i]); | |
958 | ||
959 | c[i] = c[i] - ax; | |
960 | c[i+1] = c[i+1] + bx; | |
961 | ||
962 | If a match occurred then TRUE is returned, else FALSE. The initial match is | |
963 | expected to be in OP1 and the initial match operands in args0. */ | |
964 | ||
965 | internal_fn | |
966 | complex_mul_pattern::matches (complex_operation_t op, | |
967 | slp_tree_to_load_perm_map_t *perm_cache, | |
968 | slp_tree *node, vec<slp_tree> *ops) | |
969 | { | |
970 | internal_fn ifn = IFN_LAST; | |
971 | ||
972 | if (op != MINUS_PLUS) | |
973 | return IFN_LAST; | |
974 | ||
975 | slp_tree root = *node; | |
976 | /* First two nodes must be a multiply. */ | |
977 | auto_vec<slp_tree> muls; | |
978 | if (vect_match_call_complex_mla (root, 0) != MULT_MULT | |
979 | || vect_match_call_complex_mla (root, 1, &muls) != MULT_MULT) | |
980 | return IFN_LAST; | |
981 | ||
982 | /* Now operand2+4 may lead to another expression. */ | |
983 | auto_vec<slp_tree> left_op, right_op; | |
984 | left_op.safe_splice (SLP_TREE_CHILDREN (muls[0])); | |
985 | right_op.safe_splice (SLP_TREE_CHILDREN (muls[1])); | |
986 | ||
c3a2bc6d | 987 | if (linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN) |
e09173d8 TC |
988 | return IFN_LAST; |
989 | ||
990 | bool neg_first = false; | |
991 | bool conj_first_operand = false; | |
992 | bool is_neg = vect_normalize_conj_loc (right_op, &neg_first); | |
993 | ||
994 | if (!is_neg) | |
995 | { | |
996 | /* A multiplication needs to multiply agains the real pair, otherwise | |
997 | the pattern matches that of FMS. */ | |
998 | if (!vect_validate_multiplication (perm_cache, left_op, PERM_EVENEVEN) | |
999 | || vect_normalize_conj_loc (left_op)) | |
1000 | return IFN_LAST; | |
1001 | ifn = IFN_COMPLEX_MUL; | |
1002 | } | |
1003 | else if (is_neg) | |
1004 | { | |
1005 | if (!vect_validate_multiplication (perm_cache, left_op, right_op, | |
1006 | neg_first, &conj_first_operand, | |
1007 | false)) | |
1008 | return IFN_LAST; | |
1009 | ||
1010 | ifn = IFN_COMPLEX_MUL_CONJ; | |
1011 | } | |
1012 | ||
1013 | if (!vect_pattern_validate_optab (ifn, *node)) | |
1014 | return IFN_LAST; | |
1015 | ||
1016 | ops->truncate (0); | |
1017 | ops->create (3); | |
1018 | ||
c3a2bc6d | 1019 | complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]); |
e09173d8 TC |
1020 | if (kind == PERM_EVENODD) |
1021 | { | |
1022 | ops->quick_push (left_op[1]); | |
1023 | ops->quick_push (right_op[1]); | |
1024 | ops->quick_push (left_op[0]); | |
1025 | } | |
1026 | else if (kind == PERM_TOP) | |
1027 | { | |
1028 | ops->quick_push (left_op[1]); | |
1029 | ops->quick_push (right_op[1]); | |
1030 | ops->quick_push (left_op[0]); | |
1031 | } | |
1032 | else if (kind == PERM_EVENEVEN && !conj_first_operand) | |
1033 | { | |
1034 | ops->quick_push (left_op[0]); | |
1035 | ops->quick_push (right_op[0]); | |
1036 | ops->quick_push (left_op[1]); | |
1037 | } | |
1038 | else | |
1039 | { | |
1040 | ops->quick_push (left_op[0]); | |
1041 | ops->quick_push (right_op[1]); | |
1042 | ops->quick_push (left_op[1]); | |
1043 | } | |
1044 | ||
1045 | return ifn; | |
1046 | } | |
1047 | ||
1048 | /* Attempt to recognize a complex mul pattern. */ | |
1049 | ||
1050 | vect_pattern* | |
1051 | complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache, | |
1052 | slp_tree *node) | |
1053 | { | |
1054 | auto_vec<slp_tree> ops; | |
1055 | complex_operation_t op | |
1056 | = vect_detect_pair_op (*node, true, &ops); | |
1057 | internal_fn ifn | |
1058 | = complex_mul_pattern::matches (op, perm_cache, node, &ops); | |
1059 | if (ifn == IFN_LAST) | |
1060 | return NULL; | |
1061 | ||
1062 | return new complex_mul_pattern (node, &ops, ifn); | |
1063 | } | |
1064 | ||
1065 | /* Perform a replacement of the detected complex mul pattern with the new | |
1066 | instruction sequences. */ | |
1067 | ||
1068 | void | |
1069 | complex_mul_pattern::build (vec_info *vinfo) | |
1070 | { | |
1071 | slp_tree node; | |
1072 | unsigned i; | |
5296bd57 TC |
1073 | slp_tree newnode |
1074 | = vect_build_combine_node (this->m_ops[0], this->m_ops[1], *this->m_node); | |
1075 | SLP_TREE_REF_COUNT (this->m_ops[2])++; | |
1076 | ||
e09173d8 TC |
1077 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node) |
1078 | vect_free_slp_tree (node); | |
1079 | ||
1080 | /* First re-arrange the children. */ | |
1081 | SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2); | |
1082 | SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2]; | |
5296bd57 | 1083 | SLP_TREE_CHILDREN (*this->m_node)[1] = newnode; |
e09173d8 TC |
1084 | |
1085 | /* And then rewrite the node itself. */ | |
1086 | complex_pattern::build (vinfo); | |
1087 | } | |
1088 | ||
31fac318 TC |
1089 | /******************************************************************************* |
1090 | * complex_fma_pattern class | |
1091 | ******************************************************************************/ | |
1092 | ||
1093 | class complex_fma_pattern : public complex_pattern | |
1094 | { | |
1095 | protected: | |
1096 | complex_fma_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
1097 | : complex_pattern (node, m_ops, ifn) | |
1098 | { | |
1099 | this->m_num_args = 3; | |
1100 | } | |
1101 | ||
1102 | public: | |
1103 | void build (vec_info *); | |
1104 | static internal_fn | |
1105 | matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *, | |
1106 | vec<slp_tree> *); | |
1107 | ||
1108 | static vect_pattern* | |
1109 | recognize (slp_tree_to_load_perm_map_t *, slp_tree *); | |
1110 | ||
1111 | static vect_pattern* | |
1112 | mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
1113 | { | |
1114 | return new complex_fma_pattern (node, m_ops, ifn); | |
1115 | } | |
1116 | }; | |
1117 | ||
31fac318 TC |
1118 | /* Pattern matcher for trying to match complex multiply and accumulate |
1119 | and multiply and subtract patterns in SLP tree. | |
1120 | If the operation matches then IFN is set to the operation it matched and | |
1121 | the arguments to the two replacement statements are put in m_ops. | |
1122 | ||
1123 | If no match is found then IFN is set to IFN_LAST and m_ops is unchanged. | |
1124 | ||
1125 | This function matches the patterns shaped as: | |
1126 | ||
1127 | double ax = (b[i+1] * a[i]) + (b[i] * a[i]); | |
1128 | double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]); | |
1129 | ||
1130 | c[i] = c[i] - ax; | |
1131 | c[i+1] = c[i+1] + bx; | |
1132 | ||
1133 | If a match occurred then TRUE is returned, else FALSE. The match is | |
1134 | performed after COMPLEX_MUL which would have done the majority of the work. | |
1135 | This function merely matches an ADD with a COMPLEX_MUL IFN. The initial | |
1136 | match is expected to be in OP1 and the initial match operands in args0. */ | |
1137 | ||
1138 | internal_fn | |
1139 | complex_fma_pattern::matches (complex_operation_t op, | |
1140 | slp_tree_to_load_perm_map_t * /* perm_cache */, | |
1141 | slp_tree *ref_node, vec<slp_tree> *ops) | |
1142 | { | |
1143 | internal_fn ifn = IFN_LAST; | |
1144 | ||
1145 | /* Find the two components. We match Complex MUL first which reduces the | |
1146 | amount of work this pattern has to do. After that we just match the | |
1147 | head node and we're done.: | |
1148 | ||
1149 | * FMA: + +. | |
1150 | ||
1151 | We need to ignore the two_operands nodes that may also match. | |
1152 | For that we can check if they have any scalar statements and also | |
1153 | check that it's not a permute node as we're looking for a normal | |
1154 | PLUS_EXPR operation. */ | |
1155 | if (op != CMPLX_NONE) | |
1156 | return IFN_LAST; | |
1157 | ||
1158 | /* Find the two components. We match Complex MUL first which reduces the | |
1159 | amount of work this pattern has to do. After that we just match the | |
1160 | head node and we're done.: | |
1161 | ||
1162 | * FMA: + + on a non-two_operands node. */ | |
1163 | slp_tree vnode = *ref_node; | |
1164 | if (SLP_TREE_LANE_PERMUTATION (vnode).exists () | |
1165 | || !SLP_TREE_CHILDREN (vnode).exists () | |
1166 | || !vect_match_expression_p (vnode, PLUS_EXPR)) | |
1167 | return IFN_LAST; | |
1168 | ||
1169 | slp_tree node = SLP_TREE_CHILDREN (vnode)[1]; | |
1170 | ||
1171 | if (vect_match_call_p (node, IFN_COMPLEX_MUL)) | |
1172 | ifn = IFN_COMPLEX_FMA; | |
1173 | else if (vect_match_call_p (node, IFN_COMPLEX_MUL_CONJ)) | |
1174 | ifn = IFN_COMPLEX_FMA_CONJ; | |
1175 | else | |
1176 | return IFN_LAST; | |
1177 | ||
1178 | if (!vect_pattern_validate_optab (ifn, vnode)) | |
1179 | return IFN_LAST; | |
1180 | ||
31fac318 TC |
1181 | ops->truncate (0); |
1182 | ops->create (3); | |
1183 | ||
1184 | if (ifn == IFN_COMPLEX_FMA) | |
1185 | { | |
1186 | ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]); | |
1187 | ops->quick_push (SLP_TREE_CHILDREN (node)[1]); | |
1188 | ops->quick_push (SLP_TREE_CHILDREN (node)[0]); | |
1189 | } | |
1190 | else | |
1191 | { | |
1192 | ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]); | |
1193 | ops->quick_push (SLP_TREE_CHILDREN (node)[0]); | |
1194 | ops->quick_push (SLP_TREE_CHILDREN (node)[1]); | |
1195 | } | |
1196 | ||
1197 | return ifn; | |
1198 | } | |
1199 | ||
1200 | /* Attempt to recognize a complex mul pattern. */ | |
1201 | ||
1202 | vect_pattern* | |
1203 | complex_fma_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache, | |
1204 | slp_tree *node) | |
1205 | { | |
1206 | auto_vec<slp_tree> ops; | |
1207 | complex_operation_t op | |
1208 | = vect_detect_pair_op (*node, true, &ops); | |
1209 | internal_fn ifn | |
1210 | = complex_fma_pattern::matches (op, perm_cache, node, &ops); | |
1211 | if (ifn == IFN_LAST) | |
1212 | return NULL; | |
1213 | ||
1214 | return new complex_fma_pattern (node, &ops, ifn); | |
1215 | } | |
1216 | ||
1217 | /* Perform a replacement of the detected complex mul pattern with the new | |
1218 | instruction sequences. */ | |
1219 | ||
1220 | void | |
1221 | complex_fma_pattern::build (vec_info *vinfo) | |
1222 | { | |
5296bd57 TC |
1223 | slp_tree node = SLP_TREE_CHILDREN (*this->m_node)[1]; |
1224 | ||
31fac318 TC |
1225 | SLP_TREE_CHILDREN (*this->m_node).release (); |
1226 | SLP_TREE_CHILDREN (*this->m_node).create (3); | |
1227 | SLP_TREE_CHILDREN (*this->m_node).safe_splice (this->m_ops); | |
1228 | ||
5296bd57 TC |
1229 | SLP_TREE_REF_COUNT (this->m_ops[1])++; |
1230 | SLP_TREE_REF_COUNT (this->m_ops[2])++; | |
1231 | ||
1232 | vect_free_slp_tree (node); | |
1233 | ||
31fac318 TC |
1234 | complex_pattern::build (vinfo); |
1235 | } | |
1236 | ||
478e571a TC |
1237 | /******************************************************************************* |
1238 | * complex_fms_pattern class | |
1239 | ******************************************************************************/ | |
1240 | ||
1241 | class complex_fms_pattern : public complex_pattern | |
1242 | { | |
1243 | protected: | |
1244 | complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
1245 | : complex_pattern (node, m_ops, ifn) | |
1246 | { | |
1247 | this->m_num_args = 3; | |
1248 | } | |
1249 | ||
1250 | public: | |
1251 | void build (vec_info *); | |
1252 | static internal_fn | |
1253 | matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *, | |
1254 | vec<slp_tree> *); | |
1255 | ||
1256 | static vect_pattern* | |
1257 | recognize (slp_tree_to_load_perm_map_t *, slp_tree *); | |
1258 | ||
1259 | static vect_pattern* | |
1260 | mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
1261 | { | |
1262 | return new complex_fms_pattern (node, m_ops, ifn); | |
1263 | } | |
1264 | }; | |
1265 | ||
1266 | ||
1267 | /* Pattern matcher for trying to match complex multiply and accumulate | |
1268 | and multiply and subtract patterns in SLP tree. | |
1269 | If the operation matches then IFN is set to the operation it matched and | |
1270 | the arguments to the two replacement statements are put in m_ops. | |
1271 | ||
1272 | If no match is found then IFN is set to IFN_LAST and m_ops is unchanged. | |
1273 | ||
1274 | This function matches the patterns shaped as: | |
1275 | ||
1276 | double ax = (b[i+1] * a[i]) + (b[i] * a[i]); | |
1277 | double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]); | |
1278 | ||
1279 | c[i] = c[i] - ax; | |
1280 | c[i+1] = c[i+1] + bx; | |
1281 | ||
1282 | If a match occurred then TRUE is returned, else FALSE. The initial match is | |
1283 | expected to be in OP1 and the initial match operands in args0. */ | |
1284 | ||
1285 | internal_fn | |
1286 | complex_fms_pattern::matches (complex_operation_t op, | |
1287 | slp_tree_to_load_perm_map_t *perm_cache, | |
1288 | slp_tree * ref_node, vec<slp_tree> *ops) | |
1289 | { | |
1290 | internal_fn ifn = IFN_LAST; | |
1291 | ||
1292 | /* Find the two components. We match Complex MUL first which reduces the | |
1293 | amount of work this pattern has to do. After that we just match the | |
1294 | head node and we're done.: | |
1295 | ||
1296 | * FMS: - +. */ | |
1297 | slp_tree child = NULL; | |
1298 | ||
1299 | /* We need to ignore the two_operands nodes that may also match, | |
1300 | for that we can check if they have any scalar statements and also | |
1301 | check that it's not a permute node as we're looking for a normal | |
1302 | PLUS_EXPR operation. */ | |
1303 | if (op != PLUS_MINUS) | |
1304 | return IFN_LAST; | |
1305 | ||
1306 | child = SLP_TREE_CHILDREN ((*ops)[1])[1]; | |
1307 | if (vect_detect_pair_op (child) != MINUS_PLUS) | |
1308 | return IFN_LAST; | |
1309 | ||
1310 | /* First two nodes must be a multiply. */ | |
1311 | auto_vec<slp_tree> muls; | |
1312 | if (vect_match_call_complex_mla (child, 0) != MULT_MULT | |
1313 | || vect_match_call_complex_mla (child, 1, &muls) != MULT_MULT) | |
1314 | return IFN_LAST; | |
1315 | ||
1316 | /* Now operand2+4 may lead to another expression. */ | |
1317 | auto_vec<slp_tree> left_op, right_op; | |
1318 | left_op.safe_splice (SLP_TREE_CHILDREN (muls[0])); | |
1319 | right_op.safe_splice (SLP_TREE_CHILDREN (muls[1])); | |
1320 | ||
1321 | bool is_neg = vect_normalize_conj_loc (left_op); | |
1322 | ||
1323 | child = SLP_TREE_CHILDREN ((*ops)[1])[0]; | |
1324 | bool conj_first_operand = false; | |
1325 | if (!vect_validate_multiplication (perm_cache, right_op, left_op, false, | |
1326 | &conj_first_operand, true)) | |
1327 | return IFN_LAST; | |
1328 | ||
1329 | if (!is_neg) | |
1330 | ifn = IFN_COMPLEX_FMS; | |
1331 | else if (is_neg) | |
1332 | ifn = IFN_COMPLEX_FMS_CONJ; | |
1333 | ||
1334 | if (!vect_pattern_validate_optab (ifn, *ref_node)) | |
1335 | return IFN_LAST; | |
1336 | ||
1337 | ops->truncate (0); | |
1338 | ops->create (4); | |
1339 | ||
c3a2bc6d | 1340 | complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]); |
478e571a TC |
1341 | if (kind == PERM_EVENODD) |
1342 | { | |
1343 | ops->quick_push (child); | |
1344 | ops->quick_push (right_op[0]); | |
1345 | ops->quick_push (right_op[1]); | |
1346 | ops->quick_push (left_op[1]); | |
1347 | } | |
1348 | else if (kind == PERM_TOP) | |
1349 | { | |
1350 | ops->quick_push (child); | |
1351 | ops->quick_push (right_op[1]); | |
1352 | ops->quick_push (right_op[0]); | |
1353 | ops->quick_push (left_op[0]); | |
1354 | } | |
1355 | else if (kind == PERM_EVENEVEN && !is_neg) | |
1356 | { | |
1357 | ops->quick_push (child); | |
1358 | ops->quick_push (right_op[1]); | |
1359 | ops->quick_push (right_op[0]); | |
1360 | ops->quick_push (left_op[0]); | |
1361 | } | |
1362 | else | |
1363 | { | |
1364 | ops->quick_push (child); | |
1365 | ops->quick_push (right_op[1]); | |
1366 | ops->quick_push (right_op[0]); | |
1367 | ops->quick_push (left_op[1]); | |
1368 | } | |
1369 | ||
1370 | return ifn; | |
1371 | } | |
1372 | ||
1373 | /* Attempt to recognize a complex mul pattern. */ | |
1374 | ||
1375 | vect_pattern* | |
1376 | complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache, | |
1377 | slp_tree *node) | |
1378 | { | |
1379 | auto_vec<slp_tree> ops; | |
1380 | complex_operation_t op | |
1381 | = vect_detect_pair_op (*node, true, &ops); | |
1382 | internal_fn ifn | |
1383 | = complex_fms_pattern::matches (op, perm_cache, node, &ops); | |
1384 | if (ifn == IFN_LAST) | |
1385 | return NULL; | |
1386 | ||
1387 | return new complex_fms_pattern (node, &ops, ifn); | |
1388 | } | |
1389 | ||
1390 | /* Perform a replacement of the detected complex mul pattern with the new | |
1391 | instruction sequences. */ | |
1392 | ||
1393 | void | |
1394 | complex_fms_pattern::build (vec_info *vinfo) | |
1395 | { | |
1396 | slp_tree node; | |
1397 | unsigned i; | |
5296bd57 TC |
1398 | slp_tree newnode = |
1399 | vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node); | |
1400 | SLP_TREE_REF_COUNT (this->m_ops[0])++; | |
1401 | SLP_TREE_REF_COUNT (this->m_ops[1])++; | |
1402 | ||
478e571a TC |
1403 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node) |
1404 | vect_free_slp_tree (node); | |
1405 | ||
1406 | SLP_TREE_CHILDREN (*this->m_node).release (); | |
1407 | SLP_TREE_CHILDREN (*this->m_node).create (3); | |
1408 | ||
1409 | /* First re-arrange the children. */ | |
1410 | SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]); | |
1411 | SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]); | |
5296bd57 | 1412 | SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode); |
478e571a TC |
1413 | |
1414 | /* And then rewrite the node itself. */ | |
1415 | complex_pattern::build (vinfo); | |
1416 | } | |
1417 | ||
b50df1e7 TC |
1418 | /******************************************************************************* |
1419 | * complex_operations_pattern class | |
1420 | ******************************************************************************/ | |
1421 | ||
1422 | /* This function combines all the existing pattern matchers above into one class | |
1423 | that shares the functionality between them. The initial match is shared | |
1424 | between all complex operations. */ | |
1425 | ||
1426 | class complex_operations_pattern : public complex_pattern | |
1427 | { | |
1428 | protected: | |
1429 | complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops, | |
1430 | internal_fn ifn) | |
1431 | : complex_pattern (node, m_ops, ifn) | |
1432 | { | |
1433 | this->m_num_args = 0; | |
1434 | } | |
1435 | ||
1436 | public: | |
1437 | void build (vec_info *); | |
1438 | static internal_fn | |
1439 | matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *, | |
1440 | vec<slp_tree> *); | |
1441 | ||
1442 | static vect_pattern* | |
1443 | recognize (slp_tree_to_load_perm_map_t *, slp_tree *); | |
1444 | }; | |
1445 | ||
1446 | /* Dummy matches implementation for proxy object. */ | |
1447 | ||
1448 | internal_fn | |
1449 | complex_operations_pattern:: | |
1450 | matches (complex_operation_t /* op */, | |
1451 | slp_tree_to_load_perm_map_t * /* perm_cache */, | |
1452 | slp_tree * /* ref_node */, vec<slp_tree> * /* ops */) | |
1453 | { | |
1454 | return IFN_LAST; | |
1455 | } | |
1456 | ||
1457 | /* Attempt to recognize a complex mul pattern. */ | |
1458 | ||
1459 | vect_pattern* | |
1460 | complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache, | |
1461 | slp_tree *node) | |
1462 | { | |
1463 | auto_vec<slp_tree> ops; | |
1464 | complex_operation_t op | |
1465 | = vect_detect_pair_op (*node, true, &ops); | |
1466 | internal_fn ifn = IFN_LAST; | |
1467 | ||
1468 | ifn = complex_fms_pattern::matches (op, perm_cache, node, &ops); | |
1469 | if (ifn != IFN_LAST) | |
1470 | return complex_fms_pattern::mkInstance (node, &ops, ifn); | |
1471 | ||
1472 | ifn = complex_mul_pattern::matches (op, perm_cache, node, &ops); | |
1473 | if (ifn != IFN_LAST) | |
1474 | return complex_mul_pattern::mkInstance (node, &ops, ifn); | |
1475 | ||
1476 | ifn = complex_fma_pattern::matches (op, perm_cache, node, &ops); | |
1477 | if (ifn != IFN_LAST) | |
1478 | return complex_fma_pattern::mkInstance (node, &ops, ifn); | |
1479 | ||
1480 | ifn = complex_add_pattern::matches (op, perm_cache, node, &ops); | |
1481 | if (ifn != IFN_LAST) | |
1482 | return complex_add_pattern::mkInstance (node, &ops, ifn); | |
1483 | ||
1484 | return NULL; | |
1485 | } | |
1486 | ||
1487 | /* Dummy implementation of build. */ | |
1488 | ||
1489 | void | |
1490 | complex_operations_pattern::build (vec_info * /* vinfo */) | |
1491 | { | |
1492 | gcc_unreachable (); | |
1493 | } | |
1494 | ||
7a6c31f0 RB |
1495 | |
1496 | /* The addsub_pattern. */ | |
1497 | ||
1498 | class addsub_pattern : public vect_pattern | |
1499 | { | |
1500 | public: | |
7d810646 RB |
1501 | addsub_pattern (slp_tree *node, internal_fn ifn) |
1502 | : vect_pattern (node, NULL, ifn) {}; | |
7a6c31f0 RB |
1503 | |
1504 | void build (vec_info *); | |
1505 | ||
1506 | static vect_pattern* | |
1507 | recognize (slp_tree_to_load_perm_map_t *, slp_tree *); | |
1508 | }; | |
1509 | ||
1510 | vect_pattern * | |
1511 | addsub_pattern::recognize (slp_tree_to_load_perm_map_t *, slp_tree *node_) | |
1512 | { | |
1513 | slp_tree node = *node_; | |
1514 | if (SLP_TREE_CODE (node) != VEC_PERM_EXPR | |
7d810646 RB |
1515 | || SLP_TREE_CHILDREN (node).length () != 2 |
1516 | || SLP_TREE_LANE_PERMUTATION (node).length () % 2) | |
7a6c31f0 RB |
1517 | return NULL; |
1518 | ||
1519 | /* Match a blend of a plus and a minus op with the same number of plus and | |
1520 | minus lanes on the same operands. */ | |
7d810646 RB |
1521 | unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first; |
1522 | unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first; | |
1523 | if (l0 == l1) | |
1524 | return NULL; | |
1525 | bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], | |
1526 | PLUS_EXPR); | |
1527 | if (!l0add_p | |
1528 | && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR)) | |
1529 | return NULL; | |
1530 | bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], | |
1531 | PLUS_EXPR); | |
1532 | if (!l1add_p | |
1533 | && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR)) | |
7a6c31f0 | 1534 | return NULL; |
7d810646 RB |
1535 | |
1536 | slp_tree l0node = SLP_TREE_CHILDREN (node)[l0]; | |
1537 | slp_tree l1node = SLP_TREE_CHILDREN (node)[l1]; | |
1538 | if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0] | |
1539 | && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1]) | |
1540 | || (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1] | |
1541 | && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0]))) | |
7a6c31f0 RB |
1542 | return NULL; |
1543 | ||
1544 | for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i) | |
1545 | { | |
1546 | std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i]; | |
7d810646 | 1547 | /* It has to be alternating -, +, -, |
7a6c31f0 RB |
1548 | While we could permute the .ADDSUB inputs and the .ADDSUB output |
1549 | that's only profitable over the add + sub + blend if at least | |
1550 | one of the permute is optimized which we can't determine here. */ | |
7d810646 | 1551 | if (perm.first != ((i & 1) ? l1 : l0) |
7a6c31f0 RB |
1552 | || perm.second != i) |
1553 | return NULL; | |
1554 | } | |
1555 | ||
7d810646 RB |
1556 | /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... } |
1557 | (l0add_p), see whether we have FMA variants. */ | |
1558 | if (!l0add_p | |
1559 | && vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], MULT_EXPR)) | |
1560 | { | |
1561 | /* (c * d) -+ a */ | |
1562 | if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node)) | |
1563 | return new addsub_pattern (node_, IFN_VEC_FMADDSUB); | |
1564 | } | |
1565 | else if (l0add_p | |
1566 | && vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], MULT_EXPR)) | |
1567 | { | |
1568 | /* (c * d) +- a */ | |
1569 | if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node)) | |
1570 | return new addsub_pattern (node_, IFN_VEC_FMSUBADD); | |
1571 | } | |
7a6c31f0 | 1572 | |
7d810646 RB |
1573 | if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node)) |
1574 | return new addsub_pattern (node_, IFN_VEC_ADDSUB); | |
1575 | ||
1576 | return NULL; | |
7a6c31f0 RB |
1577 | } |
1578 | ||
1579 | void | |
1580 | addsub_pattern::build (vec_info *vinfo) | |
1581 | { | |
1582 | slp_tree node = *m_node; | |
1583 | ||
7d810646 RB |
1584 | unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first; |
1585 | unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first; | |
1586 | ||
1587 | switch (m_ifn) | |
1588 | { | |
1589 | case IFN_VEC_ADDSUB: | |
1590 | { | |
1591 | slp_tree sub = SLP_TREE_CHILDREN (node)[l0]; | |
1592 | slp_tree add = SLP_TREE_CHILDREN (node)[l1]; | |
1593 | ||
1594 | /* Modify the blend node in-place. */ | |
1595 | SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0]; | |
1596 | SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1]; | |
1597 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++; | |
1598 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++; | |
1599 | ||
1600 | /* Build IFN_VEC_ADDSUB from the sub representative operands. */ | |
1601 | stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub); | |
1602 | gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2, | |
1603 | gimple_assign_rhs1 (rep->stmt), | |
1604 | gimple_assign_rhs2 (rep->stmt)); | |
1605 | gimple_call_set_lhs (call, make_ssa_name | |
1606 | (TREE_TYPE (gimple_assign_lhs (rep->stmt)))); | |
1607 | gimple_call_set_nothrow (call, true); | |
1608 | gimple_set_bb (call, gimple_bb (rep->stmt)); | |
1609 | stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep); | |
1610 | SLP_TREE_REPRESENTATIVE (node) = new_rep; | |
1611 | STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope; | |
1612 | STMT_SLP_TYPE (new_rep) = pure_slp; | |
1613 | STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node); | |
1614 | STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true; | |
1615 | STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep)); | |
1616 | SLP_TREE_CODE (node) = ERROR_MARK; | |
1617 | SLP_TREE_LANE_PERMUTATION (node).release (); | |
1618 | ||
1619 | vect_free_slp_tree (sub); | |
1620 | vect_free_slp_tree (add); | |
1621 | break; | |
1622 | } | |
1623 | case IFN_VEC_FMADDSUB: | |
1624 | case IFN_VEC_FMSUBADD: | |
1625 | { | |
1626 | slp_tree sub, add; | |
1627 | if (m_ifn == IFN_VEC_FMADDSUB) | |
1628 | { | |
1629 | sub = SLP_TREE_CHILDREN (node)[l0]; | |
1630 | add = SLP_TREE_CHILDREN (node)[l1]; | |
1631 | } | |
1632 | else /* m_ifn == IFN_VEC_FMSUBADD */ | |
1633 | { | |
1634 | sub = SLP_TREE_CHILDREN (node)[l1]; | |
1635 | add = SLP_TREE_CHILDREN (node)[l0]; | |
1636 | } | |
1637 | slp_tree mul = SLP_TREE_CHILDREN (sub)[0]; | |
1638 | /* Modify the blend node in-place. */ | |
1639 | SLP_TREE_CHILDREN (node).safe_grow (3, true); | |
1640 | SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0]; | |
1641 | SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1]; | |
1642 | SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1]; | |
1643 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++; | |
1644 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++; | |
1645 | SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++; | |
1646 | ||
1647 | /* Build IFN_VEC_FMADDSUB from the mul/sub representative operands. */ | |
1648 | stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub); | |
1649 | stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul); | |
1650 | gcall *call = gimple_build_call_internal (m_ifn, 3, | |
1651 | gimple_assign_rhs1 (mrep->stmt), | |
1652 | gimple_assign_rhs2 (mrep->stmt), | |
1653 | gimple_assign_rhs2 (srep->stmt)); | |
1654 | gimple_call_set_lhs (call, make_ssa_name | |
1655 | (TREE_TYPE (gimple_assign_lhs (srep->stmt)))); | |
1656 | gimple_call_set_nothrow (call, true); | |
1657 | gimple_set_bb (call, gimple_bb (srep->stmt)); | |
1658 | stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep); | |
1659 | SLP_TREE_REPRESENTATIVE (node) = new_rep; | |
1660 | STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope; | |
1661 | STMT_SLP_TYPE (new_rep) = pure_slp; | |
1662 | STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node); | |
1663 | STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true; | |
1664 | STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep)); | |
1665 | SLP_TREE_CODE (node) = ERROR_MARK; | |
1666 | SLP_TREE_LANE_PERMUTATION (node).release (); | |
1667 | ||
1668 | vect_free_slp_tree (sub); | |
1669 | vect_free_slp_tree (add); | |
1670 | break; | |
1671 | } | |
1672 | default:; | |
1673 | } | |
7a6c31f0 RB |
1674 | } |
1675 | ||
3ed472af TC |
1676 | /******************************************************************************* |
1677 | * Pattern matching definitions | |
1678 | ******************************************************************************/ | |
1679 | ||
1680 | #define SLP_PATTERN(x) &x::recognize | |
1681 | vect_pattern_decl_t slp_patterns[] | |
1682 | { | |
1683 | /* For least amount of back-tracking and more efficient matching | |
1684 | order patterns from the largest to the smallest. Especially if they | |
1685 | overlap in what they can detect. */ | |
1686 | ||
b50df1e7 | 1687 | SLP_PATTERN (complex_operations_pattern), |
7a6c31f0 | 1688 | SLP_PATTERN (addsub_pattern) |
3ed472af TC |
1689 | }; |
1690 | #undef SLP_PATTERN | |
1691 | ||
1692 | /* Set the number of SLP pattern matchers available. */ | |
1693 | size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(vect_pattern_decl_t); |