]>
Commit | Line | Data |
---|---|---|
3ed472af TC |
1 | /* SLP - Pattern matcher on SLP trees |
2 | Copyright (C) 2020 Free Software Foundation, Inc. | |
3 | ||
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "backend.h" | |
24 | #include "target.h" | |
25 | #include "rtl.h" | |
26 | #include "tree.h" | |
27 | #include "gimple.h" | |
28 | #include "tree-pass.h" | |
29 | #include "ssa.h" | |
30 | #include "optabs-tree.h" | |
31 | #include "insn-config.h" | |
32 | #include "recog.h" /* FIXME: for insn_data */ | |
33 | #include "fold-const.h" | |
34 | #include "stor-layout.h" | |
35 | #include "gimple-iterator.h" | |
36 | #include "cfgloop.h" | |
37 | #include "tree-vectorizer.h" | |
38 | #include "langhooks.h" | |
39 | #include "gimple-walk.h" | |
40 | #include "dbgcnt.h" | |
41 | #include "tree-vector-builder.h" | |
42 | #include "vec-perm-indices.h" | |
43 | #include "gimple-fold.h" | |
44 | #include "internal-fn.h" | |
45 | ||
46 | /* SLP Pattern matching mechanism. | |
47 | ||
48 | This extension to the SLP vectorizer allows one to transform the generated SLP | |
49 | tree based on any pattern. The difference between this and the normal vect | |
50 | pattern matcher is that unlike the former, this matcher allows you to match | |
51 | with instructions that do not belong to the same SSA dominator graph. | |
52 | ||
53 | The only requirement that this pattern matcher has is that you are only | |
54 | only allowed to either match an entire group or none. | |
55 | ||
56 | The pattern matcher currently only allows you to perform replacements to | |
57 | internal functions. | |
58 | ||
59 | Once the patterns are matched it is one way, these cannot be undone. It is | |
60 | currently not supported to match patterns recursively. | |
61 | ||
62 | To add a new pattern, implement the vect_pattern class and add the type to | |
63 | slp_patterns. | |
64 | ||
65 | */ | |
66 | ||
67 | /******************************************************************************* | |
68 | * vect_pattern class | |
69 | ******************************************************************************/ | |
70 | ||
71 | /* Default implementation of recognize that performs matching, validation and | |
72 | replacement of nodes but that can be overriden if required. */ | |
73 | ||
74 | static bool | |
75 | vect_pattern_validate_optab (internal_fn ifn, slp_tree node) | |
76 | { | |
77 | tree vectype = SLP_TREE_VECTYPE (node); | |
78 | if (ifn == IFN_LAST || !vectype) | |
79 | return false; | |
80 | ||
81 | if (dump_enabled_p ()) | |
82 | dump_printf_loc (MSG_NOTE, vect_location, | |
83 | "Found %s pattern in SLP tree\n", | |
84 | internal_fn_name (ifn)); | |
85 | ||
86 | if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED)) | |
87 | { | |
88 | if (dump_enabled_p ()) | |
89 | dump_printf_loc (MSG_NOTE, vect_location, | |
90 | "Target supports %s vectorization with mode %T\n", | |
91 | internal_fn_name (ifn), vectype); | |
92 | } | |
93 | else | |
94 | { | |
95 | if (dump_enabled_p ()) | |
96 | { | |
97 | if (!vectype) | |
98 | dump_printf_loc (MSG_NOTE, vect_location, | |
99 | "Target does not support vector type for %T\n", | |
100 | SLP_TREE_DEF_TYPE (node)); | |
101 | else | |
102 | dump_printf_loc (MSG_NOTE, vect_location, | |
103 | "Target does not support %s for vector type " | |
104 | "%T\n", internal_fn_name (ifn), vectype); | |
105 | } | |
106 | return false; | |
107 | } | |
108 | return true; | |
109 | } | |
110 | ||
111 | /******************************************************************************* | |
112 | * General helper types | |
113 | ******************************************************************************/ | |
114 | ||
115 | /* The COMPLEX_OPERATION enum denotes the possible pair of operations that can | |
116 | be matched when looking for expressions that we are interested matching for | |
117 | complex numbers addition and mla. */ | |
118 | ||
119 | typedef enum _complex_operation : unsigned { | |
120 | PLUS_PLUS, | |
121 | MINUS_PLUS, | |
122 | PLUS_MINUS, | |
123 | MULT_MULT, | |
124 | CMPLX_NONE | |
125 | } complex_operation_t; | |
126 | ||
127 | /******************************************************************************* | |
128 | * General helper functions | |
129 | ******************************************************************************/ | |
130 | ||
131 | /* Helper function of linear_loads_p that checks to see if the load permutation | |
132 | is sequential and in monotonically increasing order of loads with no gaps. | |
133 | */ | |
134 | ||
135 | static inline complex_perm_kinds_t | |
136 | is_linear_load_p (load_permutation_t loads) | |
137 | { | |
138 | if (loads.length() == 0) | |
139 | return PERM_UNKNOWN; | |
140 | ||
141 | unsigned load, i; | |
142 | complex_perm_kinds_t candidates[4] | |
143 | = { PERM_EVENODD | |
144 | , PERM_ODDEVEN | |
145 | , PERM_ODDODD | |
146 | , PERM_EVENEVEN | |
147 | }; | |
148 | ||
149 | int valid_patterns = 4; | |
150 | FOR_EACH_VEC_ELT_FROM (loads, i, load, 1) | |
151 | { | |
152 | if (candidates[0] != PERM_UNKNOWN && load != i) | |
153 | { | |
154 | candidates[0] = PERM_UNKNOWN; | |
155 | valid_patterns--; | |
156 | } | |
157 | if (candidates[1] != PERM_UNKNOWN | |
158 | && load != (i % 2 == 0 ? i + 1 : i - 1)) | |
159 | { | |
160 | candidates[1] = PERM_UNKNOWN; | |
161 | valid_patterns--; | |
162 | } | |
163 | if (candidates[2] != PERM_UNKNOWN && load != 1) | |
164 | { | |
165 | candidates[2] = PERM_UNKNOWN; | |
166 | valid_patterns--; | |
167 | } | |
168 | if (candidates[3] != PERM_UNKNOWN && load != 0) | |
169 | { | |
170 | candidates[3] = PERM_UNKNOWN; | |
171 | valid_patterns--; | |
172 | } | |
173 | ||
174 | if (valid_patterns == 0) | |
175 | return PERM_UNKNOWN; | |
176 | } | |
177 | ||
178 | for (i = 0; i < sizeof(candidates); i++) | |
179 | if (candidates[i] != PERM_UNKNOWN) | |
180 | return candidates[i]; | |
181 | ||
182 | return PERM_UNKNOWN; | |
183 | } | |
184 | ||
185 | /* Combine complex_perm_kinds A and B into a new permute kind that describes the | |
186 | resulting operation. */ | |
187 | ||
188 | static inline complex_perm_kinds_t | |
189 | vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b) | |
190 | { | |
191 | if (a == b) | |
192 | return a; | |
193 | ||
194 | if (a == PERM_TOP) | |
195 | return b; | |
196 | ||
197 | if (b == PERM_TOP) | |
198 | return a; | |
199 | ||
200 | return PERM_UNKNOWN; | |
201 | } | |
202 | ||
203 | /* Check to see if all loads rooted in ROOT are linear. Linearity is | |
204 | defined as having no gaps between values loaded. */ | |
205 | ||
206 | static complex_load_perm_t | |
207 | linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root) | |
208 | { | |
209 | if (!root) | |
210 | return std::make_pair (PERM_UNKNOWN, vNULL); | |
211 | ||
212 | unsigned i; | |
213 | complex_load_perm_t *tmp; | |
214 | ||
215 | if ((tmp = perm_cache->get (root)) != NULL) | |
216 | return *tmp; | |
217 | ||
218 | complex_load_perm_t retval = std::make_pair (PERM_UNKNOWN, vNULL); | |
219 | perm_cache->put (root, retval); | |
220 | ||
221 | /* If it's a load node, then just read the load permute. */ | |
222 | if (SLP_TREE_LOAD_PERMUTATION (root).exists ()) | |
223 | { | |
224 | retval.first = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root)); | |
225 | retval.second = SLP_TREE_LOAD_PERMUTATION (root); | |
226 | perm_cache->put (root, retval); | |
227 | return retval; | |
228 | } | |
229 | else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def) | |
230 | { | |
231 | retval.first = PERM_TOP; | |
232 | return retval; | |
233 | } | |
234 | ||
235 | auto_vec<load_permutation_t> all_loads; | |
236 | complex_perm_kinds_t kind = PERM_TOP; | |
237 | ||
238 | slp_tree child; | |
239 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child) | |
240 | { | |
241 | complex_load_perm_t res = linear_loads_p (perm_cache, child); | |
242 | kind = vect_merge_perms (kind, res.first); | |
159b0bd9 TC |
243 | /* Unknown and Top are not valid on blends as they produce no permute. */ |
244 | if (kind == PERM_UNKNOWN || kind == PERM_TOP) | |
3ed472af TC |
245 | return retval; |
246 | all_loads.safe_push (res.second); | |
247 | } | |
248 | ||
249 | if (SLP_TREE_LANE_PERMUTATION (root).exists ()) | |
250 | { | |
251 | lane_permutation_t perm = SLP_TREE_LANE_PERMUTATION (root); | |
252 | load_permutation_t nloads; | |
253 | nloads.create (SLP_TREE_LANES (root)); | |
254 | nloads.quick_grow (SLP_TREE_LANES (root)); | |
255 | for (i = 0; i < SLP_TREE_LANES (root); i++) | |
256 | nloads[i] = all_loads[perm[i].first][perm[i].second]; | |
257 | ||
258 | retval.first = kind; | |
259 | retval.second = nloads; | |
260 | } | |
261 | else if (all_loads.length () == 1) | |
262 | { | |
263 | retval.first = kind; | |
264 | retval.second = all_loads[0]; | |
265 | } | |
266 | ||
267 | perm_cache->put (root, retval); | |
268 | return retval; | |
269 | } | |
270 | ||
271 | ||
272 | /* This function attempts to make a node rooted in NODE is linear. If the node | |
273 | if already linear than the node itself is returned in RESULT. | |
274 | ||
275 | If the node is not linear then a new VEC_PERM_EXPR node is created with a | |
276 | lane permute that when applied will make the node linear. If such a | |
277 | permute cannot be created then FALSE is returned from the function. | |
278 | ||
279 | Here linearity is defined as having a sequential, monotically increasing | |
280 | load position inside the load permute generated by the loads reachable from | |
281 | NODE. */ | |
282 | ||
283 | static slp_tree | |
284 | vect_build_swap_evenodd_node (slp_tree node) | |
285 | { | |
286 | /* Attempt to linearise the permute. */ | |
287 | vec<std::pair<unsigned, unsigned> > zipped; | |
288 | zipped.create (SLP_TREE_LANES (node)); | |
289 | ||
290 | for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2) | |
291 | { | |
292 | zipped.quick_push (std::make_pair (0, x+1)); | |
293 | zipped.quick_push (std::make_pair (0, x)); | |
294 | } | |
295 | ||
296 | /* Create the new permute node and store it instead. */ | |
297 | slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR); | |
298 | SLP_TREE_LANE_PERMUTATION (vnode) = zipped; | |
299 | SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node); | |
300 | SLP_TREE_CHILDREN (vnode).quick_push (node); | |
301 | SLP_TREE_REF_COUNT (vnode) = 1; | |
302 | SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node); | |
303 | SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node); | |
304 | SLP_TREE_REF_COUNT (node)++; | |
305 | return vnode; | |
306 | } | |
307 | ||
308 | /* Checks to see of the expression represented by NODE is a gimple assign with | |
309 | code CODE. */ | |
310 | ||
311 | static inline bool | |
312 | vect_match_expression_p (slp_tree node, tree_code code) | |
313 | { | |
314 | if (!node | |
315 | || !SLP_TREE_REPRESENTATIVE (node)) | |
316 | return false; | |
317 | ||
318 | gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)); | |
319 | if (!is_gimple_assign (expr) | |
320 | || gimple_assign_rhs_code (expr) != code) | |
321 | return false; | |
322 | ||
323 | return true; | |
324 | } | |
325 | ||
326 | /* Check if the given lane permute in PERMUTES matches an alternating sequence | |
327 | of {even odd even odd ...}. This to account for unrolled loops. Further | |
328 | mode there resulting permute must be linear. */ | |
329 | ||
330 | static inline bool | |
331 | vect_check_evenodd_blend (lane_permutation_t &permutes, | |
332 | unsigned even, unsigned odd) | |
333 | { | |
334 | if (permutes.length () == 0) | |
335 | return false; | |
336 | ||
337 | unsigned val[2] = {even, odd}; | |
338 | unsigned seed = 0; | |
339 | for (unsigned i = 0; i < permutes.length (); i++) | |
340 | if (permutes[i].first != val[i % 2] | |
341 | || permutes[i].second != seed++) | |
342 | return false; | |
343 | ||
344 | return true; | |
345 | } | |
346 | ||
347 | /* This function will match the two gimple expressions representing NODE1 and | |
348 | NODE2 in parallel and returns the pair operation that represents the two | |
349 | expressions in the two statements. | |
350 | ||
351 | If match is successful then the corresponding complex_operation is | |
352 | returned and the arguments to the two matched operations are returned in OPS. | |
353 | ||
354 | If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select | |
355 | from the two nodes alternatingly. | |
356 | ||
357 | If unsuccessful then CMPLX_NONE is returned and OPS is untouched. | |
358 | ||
359 | e.g. the following gimple statements | |
360 | ||
361 | stmt 0 _39 = _37 + _12; | |
362 | stmt 1 _6 = _38 - _36; | |
363 | ||
364 | will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}. | |
365 | */ | |
366 | ||
367 | static complex_operation_t | |
368 | vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes, | |
369 | bool two_operands = true, vec<slp_tree> *ops = NULL) | |
370 | { | |
371 | complex_operation_t result = CMPLX_NONE; | |
372 | ||
373 | if (vect_match_expression_p (node1, MINUS_EXPR) | |
374 | && vect_match_expression_p (node2, PLUS_EXPR) | |
375 | && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1))) | |
376 | result = MINUS_PLUS; | |
377 | else if (vect_match_expression_p (node1, PLUS_EXPR) | |
378 | && vect_match_expression_p (node2, MINUS_EXPR) | |
379 | && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1))) | |
380 | result = PLUS_MINUS; | |
381 | else if (vect_match_expression_p (node1, PLUS_EXPR) | |
382 | && vect_match_expression_p (node2, PLUS_EXPR)) | |
383 | result = PLUS_PLUS; | |
384 | else if (vect_match_expression_p (node1, MULT_EXPR) | |
385 | && vect_match_expression_p (node2, MULT_EXPR)) | |
386 | result = MULT_MULT; | |
387 | ||
388 | if (result != CMPLX_NONE && ops != NULL) | |
389 | { | |
390 | ops->create (2); | |
391 | ops->quick_push (node1); | |
392 | ops->quick_push (node2); | |
393 | } | |
394 | return result; | |
395 | } | |
396 | ||
397 | /* Overload of vect_detect_pair_op that matches against the representative | |
398 | statements in the children of NODE. It is expected that NODE has exactly | |
399 | two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */ | |
400 | ||
401 | static complex_operation_t | |
402 | vect_detect_pair_op (slp_tree node, bool two_operands = true, | |
403 | vec<slp_tree> *ops = NULL) | |
404 | { | |
405 | if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR) | |
406 | return CMPLX_NONE; | |
407 | ||
408 | if (SLP_TREE_CHILDREN (node).length () != 2) | |
409 | return CMPLX_NONE; | |
410 | ||
411 | vec<slp_tree> children = SLP_TREE_CHILDREN (node); | |
412 | lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node); | |
413 | ||
414 | return vect_detect_pair_op (children[0], children[1], lanes, two_operands, | |
415 | ops); | |
416 | } | |
417 | ||
418 | /******************************************************************************* | |
419 | * complex_pattern class | |
420 | ******************************************************************************/ | |
421 | ||
422 | /* SLP Complex Numbers pattern matching. | |
423 | ||
424 | As an example, the following simple loop: | |
425 | ||
426 | double a[restrict N]; double b[restrict N]; double c[restrict N]; | |
427 | ||
428 | for (int i=0; i < N; i+=2) | |
429 | { | |
430 | c[i] = a[i] - b[i+1]; | |
431 | c[i+1] = a[i+1] + b[i]; | |
432 | } | |
433 | ||
434 | which represents a complex addition on with a rotation of 90* around the | |
435 | argand plane. i.e. if `a` and `b` were complex numbers then this would be the | |
436 | same as `a + (b * I)`. | |
437 | ||
438 | Here the expressions for `c[i]` and `c[i+1]` are independent but have to be | |
439 | both recognized in order for the pattern to work. As an SLP tree this is | |
440 | represented as | |
441 | ||
442 | +--------------------------------+ | |
443 | | stmt 0 *_9 = _10; | | |
444 | | stmt 1 *_15 = _16; | | |
445 | +--------------------------------+ | |
446 | | | |
447 | | | |
448 | v | |
449 | +--------------------------------+ | |
450 | | stmt 0 _10 = _4 - _8; | | |
451 | | stmt 1 _16 = _12 + _14; | | |
452 | | lane permutation { 0[0] 1[1] } | | |
453 | +--------------------------------+ | |
454 | | | | |
455 | | | | |
456 | | | | |
457 | +-----+ | | +-----+ | |
458 | | | | | | | | |
459 | +-----| { } |<-----+ +----->| { } --------+ | |
460 | | | | +------------------| | | | |
461 | | +-----+ | +-----+ | | |
462 | | | | | | |
463 | | | | | | |
464 | | +------|------------------+ | | |
465 | | | | | | |
466 | v v v v | |
467 | +--------------------------+ +--------------------------------+ | |
468 | | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; | | |
469 | | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; | | |
470 | | load permutation { 1 0 } | | load permutation { 0 1 } | | |
471 | +--------------------------+ +--------------------------------+ | |
472 | ||
473 | The pattern matcher allows you to replace both statements 0 and 1 or none at | |
474 | all. Because this operation is a two operands operation the actual nodes | |
475 | being replaced are those in the { } nodes. The actual scalar statements | |
476 | themselves are not replaced or used during the matching but instead the | |
477 | SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to | |
478 | replace and match on any number of nodes. | |
479 | ||
480 | Because the pattern matcher matches on the representative statement for the | |
481 | SLP node the case of two_operators it allows you to match the children of the | |
482 | node. This is done using the method `recognize ()`. | |
483 | ||
484 | */ | |
485 | ||
486 | /* The complex_pattern class contains common code for pattern matchers that work | |
487 | on complex numbers. These provide functionality to allow de-construction and | |
488 | validation of sequences depicting/transforming REAL and IMAG pairs. */ | |
489 | ||
490 | class complex_pattern : public vect_pattern | |
491 | { | |
492 | protected: | |
493 | auto_vec<slp_tree> m_workset; | |
494 | complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
495 | : vect_pattern (node, m_ops, ifn) | |
496 | { | |
497 | this->m_workset.safe_push (*node); | |
498 | } | |
499 | ||
500 | public: | |
501 | void build (vec_info *); | |
502 | ||
503 | static internal_fn | |
504 | matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, | |
505 | vec<slp_tree> *); | |
506 | }; | |
507 | ||
508 | /* Create a replacement pattern statement for each node in m_node and inserts | |
509 | the new statement into m_node as the new representative statement. The old | |
510 | statement is marked as being in a pattern defined by the new statement. The | |
511 | statement is created as call to internal function IFN with m_num_args | |
512 | arguments. | |
513 | ||
514 | Futhermore the new pattern is also added to the vectorization information | |
515 | structure VINFO and the old statement STMT_INFO is marked as unused while | |
516 | the new statement is marked as used and the number of SLP uses of the new | |
517 | statement is incremented. | |
518 | ||
519 | The newly created SLP nodes are marked as SLP only and will be dissolved | |
520 | if SLP is aborted. | |
521 | ||
522 | The newly created gimple call is returned and the BB remains unchanged. | |
523 | ||
524 | This default method is designed to only match against simple operands where | |
525 | all the input and output types are the same. | |
526 | */ | |
527 | ||
528 | void | |
529 | complex_pattern::build (vec_info *vinfo) | |
530 | { | |
531 | stmt_vec_info stmt_info; | |
532 | ||
533 | auto_vec<tree> args; | |
534 | args.create (this->m_num_args); | |
535 | args.quick_grow_cleared (this->m_num_args); | |
536 | slp_tree node; | |
537 | unsigned ix; | |
538 | stmt_vec_info call_stmt_info; | |
539 | gcall *call_stmt = NULL; | |
540 | ||
541 | /* Now modify the nodes themselves. */ | |
542 | FOR_EACH_VEC_ELT (this->m_workset, ix, node) | |
543 | { | |
544 | /* Calculate the location of the statement in NODE to replace. */ | |
545 | stmt_info = SLP_TREE_REPRESENTATIVE (node); | |
546 | gimple* old_stmt = STMT_VINFO_STMT (stmt_info); | |
547 | tree lhs_old_stmt = gimple_get_lhs (old_stmt); | |
548 | tree type = TREE_TYPE (lhs_old_stmt); | |
549 | ||
550 | /* Create the argument set for use by gimple_build_call_internal_vec. */ | |
551 | for (unsigned i = 0; i < this->m_num_args; i++) | |
552 | args[i] = lhs_old_stmt; | |
553 | ||
554 | /* Create the new pattern statements. */ | |
555 | call_stmt = gimple_build_call_internal_vec (this->m_ifn, args); | |
556 | tree var = make_temp_ssa_name (type, call_stmt, "slp_patt"); | |
557 | gimple_call_set_lhs (call_stmt, var); | |
558 | gimple_set_location (call_stmt, gimple_location (old_stmt)); | |
559 | gimple_call_set_nothrow (call_stmt, true); | |
560 | ||
561 | /* Adjust the book-keeping for the new and old statements for use during | |
562 | SLP. This is required to get the right VF and statement during SLP | |
563 | analysis. These changes are created after relevancy has been set for | |
564 | the nodes as such we need to manually update them. Any changes will be | |
565 | undone if SLP is cancelled. */ | |
566 | call_stmt_info | |
567 | = vinfo->add_pattern_stmt (call_stmt, stmt_info); | |
568 | ||
569 | /* Make sure to mark the representative statement pure_slp and | |
570 | relevant. */ | |
571 | STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope; | |
572 | STMT_SLP_TYPE (call_stmt_info) = pure_slp; | |
573 | ||
574 | /* add_pattern_stmt can't be done in vect_mark_pattern_stmts because | |
575 | the non-SLP pattern matchers already have added the statement to VINFO | |
576 | by the time it is called. Some of them need to modify the returned | |
577 | stmt_info. vect_mark_pattern_stmts is called by recog_pattern and it | |
578 | would increase the size of each pattern with boilerplate code to make | |
579 | the call there. */ | |
580 | vect_mark_pattern_stmts (vinfo, stmt_info, call_stmt, | |
581 | SLP_TREE_VECTYPE (node)); | |
582 | STMT_VINFO_SLP_VECT_ONLY (call_stmt_info) = true; | |
583 | ||
584 | /* Since we are replacing all the statements in the group with the same | |
585 | thing it doesn't really matter. So just set it every time a new stmt | |
586 | is created. */ | |
587 | SLP_TREE_REPRESENTATIVE (node) = call_stmt_info; | |
588 | SLP_TREE_LANE_PERMUTATION (node).release (); | |
589 | SLP_TREE_CODE (node) = CALL_EXPR; | |
590 | } | |
591 | } | |
592 | ||
593 | /******************************************************************************* | |
594 | * complex_add_pattern class | |
595 | ******************************************************************************/ | |
596 | ||
597 | class complex_add_pattern : public complex_pattern | |
598 | { | |
599 | protected: | |
600 | complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn) | |
601 | : complex_pattern (node, m_ops, ifn) | |
602 | { | |
603 | this->m_num_args = 2; | |
604 | } | |
605 | ||
606 | public: | |
607 | void build (vec_info *); | |
608 | static internal_fn | |
609 | matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, | |
610 | vec<slp_tree> *); | |
611 | ||
612 | static vect_pattern* | |
613 | recognize (slp_tree_to_load_perm_map_t *, slp_tree *); | |
614 | }; | |
615 | ||
616 | /* Perform a replacement of the detected complex add pattern with the new | |
617 | instruction sequences. */ | |
618 | ||
619 | void | |
620 | complex_add_pattern::build (vec_info *vinfo) | |
621 | { | |
622 | auto_vec<slp_tree> nodes; | |
623 | slp_tree node = this->m_ops[0]; | |
624 | vec<slp_tree> children = SLP_TREE_CHILDREN (node); | |
625 | ||
626 | /* First re-arrange the children. */ | |
627 | nodes.create (children.length ()); | |
628 | nodes.quick_push (children[0]); | |
629 | nodes.quick_push (vect_build_swap_evenodd_node (children[1])); | |
630 | ||
631 | SLP_TREE_CHILDREN (*this->m_node).truncate (0); | |
632 | SLP_TREE_CHILDREN (*this->m_node).safe_splice (nodes); | |
633 | ||
634 | complex_pattern::build (vinfo); | |
635 | } | |
636 | ||
637 | /* Pattern matcher for trying to match complex addition pattern in SLP tree. | |
638 | ||
639 | If no match is found then IFN is set to IFN_LAST. | |
640 | This function matches the patterns shaped as: | |
641 | ||
642 | c[i] = a[i] - b[i+1]; | |
643 | c[i+1] = a[i+1] + b[i]; | |
644 | ||
645 | If a match occurred then TRUE is returned, else FALSE. The initial match is | |
646 | expected to be in OP1 and the initial match operands in args0. */ | |
647 | ||
648 | internal_fn | |
649 | complex_add_pattern::matches (complex_operation_t op, | |
650 | slp_tree_to_load_perm_map_t *perm_cache, | |
651 | vec<slp_tree> *ops) | |
652 | { | |
653 | internal_fn ifn = IFN_LAST; | |
654 | ||
655 | /* Find the two components. Rotation in the complex plane will modify | |
656 | the operations: | |
657 | ||
658 | * Rotation 0: + + | |
659 | * Rotation 90: - + | |
660 | * Rotation 180: - - | |
661 | * Rotation 270: + - | |
662 | ||
663 | Rotation 0 and 180 can be handled by normal SIMD code, so we don't need | |
664 | to care about them here. */ | |
665 | if (op == MINUS_PLUS) | |
666 | ifn = IFN_COMPLEX_ADD_ROT90; | |
667 | else if (op == PLUS_MINUS) | |
668 | ifn = IFN_COMPLEX_ADD_ROT270; | |
669 | else | |
670 | return ifn; | |
671 | ||
672 | /* verify that there is a permute, otherwise this isn't a pattern we | |
673 | we support. */ | |
674 | gcc_assert (ops->length () == 2); | |
675 | ||
676 | vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]); | |
677 | ||
678 | /* First node must be unpermuted. */ | |
679 | if (linear_loads_p (perm_cache, children[0]).first != PERM_EVENODD) | |
680 | return IFN_LAST; | |
681 | ||
682 | /* Second node must be permuted. */ | |
683 | if (linear_loads_p (perm_cache, children[1]).first != PERM_ODDEVEN) | |
684 | return IFN_LAST; | |
685 | ||
686 | return ifn; | |
687 | } | |
688 | ||
689 | /* Attempt to recognize a complex add pattern. */ | |
690 | ||
691 | vect_pattern* | |
692 | complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache, | |
693 | slp_tree *node) | |
694 | { | |
695 | auto_vec<slp_tree> ops; | |
696 | complex_operation_t op | |
697 | = vect_detect_pair_op (*node, true, &ops); | |
698 | internal_fn ifn = complex_add_pattern::matches (op, perm_cache, &ops); | |
699 | if (!vect_pattern_validate_optab (ifn, *node)) | |
700 | return NULL; | |
701 | ||
702 | return new complex_add_pattern (node, &ops, ifn); | |
703 | } | |
704 | ||
705 | /******************************************************************************* | |
706 | * Pattern matching definitions | |
707 | ******************************************************************************/ | |
708 | ||
709 | #define SLP_PATTERN(x) &x::recognize | |
710 | vect_pattern_decl_t slp_patterns[] | |
711 | { | |
712 | /* For least amount of back-tracking and more efficient matching | |
713 | order patterns from the largest to the smallest. Especially if they | |
714 | overlap in what they can detect. */ | |
715 | ||
716 | SLP_PATTERN (complex_add_pattern), | |
717 | }; | |
718 | #undef SLP_PATTERN | |
719 | ||
720 | /* Set the number of SLP pattern matchers available. */ | |
721 | size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(vect_pattern_decl_t); |