]>
Commit | Line | Data |
---|---|---|
015adf41 | 1 | /* Dependency checks for instruction scheduling, shared between ARM and |
2 | AARCH64. | |
3 | ||
f1717362 | 4 | Copyright (C) 1991-2016 Free Software Foundation, Inc. |
015adf41 | 5 | Contributed by ARM Ltd. |
6 | ||
7 | This file is part of GCC. | |
8 | ||
9 | GCC is free software; you can redistribute it and/or modify it | |
10 | under the terms of the GNU General Public License as published | |
11 | by the Free Software Foundation; either version 3, or (at your | |
12 | option) any later version. | |
13 | ||
14 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
15 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
16 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
17 | License for more details. | |
18 | ||
19 | You should have received a copy of the GNU General Public License | |
20 | along with GCC; see the file COPYING3. If not see | |
21 | <http://www.gnu.org/licenses/>. */ | |
22 | ||
23 | ||
015adf41 | 24 | #include "config.h" |
25 | #include "system.h" | |
26 | #include "coretypes.h" | |
27 | #include "tm.h" | |
015adf41 | 28 | #include "rtl.h" |
fad99894 | 29 | #include "rtl-iter.h" |
015adf41 | 30 | |
98d7984b | 31 | /* In ARMv8-A there's a general expectation that AESE/AESMC |
32 | and AESD/AESIMC sequences of the form: | |
33 | ||
34 | AESE Vn, _ | |
35 | AESMC Vn, Vn | |
36 | ||
37 | will issue both instructions in a single cycle on super-scalar | |
38 | implementations. This function identifies such pairs. */ | |
39 | ||
40 | int | |
50fc2d35 | 41 | aarch_crypto_can_dual_issue (rtx_insn *producer_insn, rtx_insn *consumer_insn) |
98d7984b | 42 | { |
50fc2d35 | 43 | rtx producer_set, consumer_set; |
98d7984b | 44 | rtx producer_src, consumer_src; |
45 | ||
50fc2d35 | 46 | producer_set = single_set (producer_insn); |
47 | consumer_set = single_set (consumer_insn); | |
98d7984b | 48 | |
50fc2d35 | 49 | producer_src = producer_set ? SET_SRC (producer_set) : NULL; |
50 | consumer_src = consumer_set ? SET_SRC (consumer_set) : NULL; | |
98d7984b | 51 | |
52 | if (producer_src && consumer_src | |
53 | && GET_CODE (producer_src) == UNSPEC && GET_CODE (consumer_src) == UNSPEC | |
54 | && ((XINT (producer_src, 1) == UNSPEC_AESE | |
55 | && XINT (consumer_src, 1) == UNSPEC_AESMC) | |
56 | || (XINT (producer_src, 1) == UNSPEC_AESD | |
57 | && XINT (consumer_src, 1) == UNSPEC_AESIMC))) | |
58 | { | |
50fc2d35 | 59 | unsigned int regno = REGNO (SET_DEST (producer_set)); |
98d7984b | 60 | |
50fc2d35 | 61 | return REGNO (SET_DEST (consumer_set)) == regno |
98d7984b | 62 | && REGNO (XVECEXP (consumer_src, 0, 0)) == regno; |
63 | } | |
64 | ||
65 | return 0; | |
66 | } | |
67 | ||
4c849ae7 | 68 | /* Return TRUE if X is either an arithmetic shift left, or |
69 | is a multiplication by a power of two. */ | |
daac2ec8 | 70 | bool |
4c849ae7 | 71 | arm_rtx_shift_left_p (rtx x) |
72 | { | |
73 | enum rtx_code code = GET_CODE (x); | |
dff74f11 | 74 | |
4c849ae7 | 75 | if (code == MULT && CONST_INT_P (XEXP (x, 1)) |
76 | && exact_log2 (INTVAL (XEXP (x, 1))) > 0) | |
77 | return true; | |
78 | ||
79 | if (code == ASHIFT) | |
80 | return true; | |
81 | ||
82 | return false; | |
83 | } | |
84 | ||
85 | static rtx_code shift_rtx_codes[] = | |
86 | { ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT, | |
87 | ROTATERT, ZERO_EXTEND, SIGN_EXTEND }; | |
88 | ||
fad99894 | 89 | /* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE. |
90 | If FIND_ANY_SHIFT then we are interested in anything which can | |
91 | reasonably be described as a SHIFT RTX. */ | |
4c849ae7 | 92 | static rtx |
93 | arm_find_sub_rtx_with_code (rtx pattern, rtx_code code, bool find_any_shift) | |
94 | { | |
fad99894 | 95 | subrtx_var_iterator::array_type array; |
96 | FOR_EACH_SUBRTX_VAR (iter, array, pattern, NONCONST) | |
97 | { | |
98 | rtx x = *iter; | |
99 | if (find_any_shift) | |
100 | { | |
101 | /* Left shifts might have been canonicalized to a MULT of some | |
102 | power of two. Make sure we catch them. */ | |
103 | if (arm_rtx_shift_left_p (x)) | |
104 | return x; | |
105 | else | |
106 | for (unsigned int i = 0; i < ARRAY_SIZE (shift_rtx_codes); i++) | |
107 | if (GET_CODE (x) == shift_rtx_codes[i]) | |
108 | return x; | |
109 | } | |
110 | ||
111 | if (GET_CODE (x) == code) | |
112 | return x; | |
113 | } | |
114 | return NULL_RTX; | |
4c849ae7 | 115 | } |
116 | ||
117 | /* Traverse PATTERN looking for any sub-rtx which looks like a shift. */ | |
118 | static rtx | |
119 | arm_find_shift_sub_rtx (rtx pattern) | |
120 | { | |
121 | return arm_find_sub_rtx_with_code (pattern, ASHIFT, true); | |
122 | } | |
123 | ||
124 | /* PRODUCER and CONSUMER are two potentially dependant RTX. PRODUCER | |
125 | (possibly) contains a SET which will provide a result we can access | |
126 | using the SET_DEST macro. We will place the RTX which would be | |
127 | written by PRODUCER in SET_SOURCE. | |
128 | Similarly, CONSUMER (possibly) contains a SET which has an operand | |
129 | we can access using SET_SRC. We place this operand in | |
130 | SET_DESTINATION. | |
131 | ||
132 | Return nonzero if we found the SET RTX we expected. */ | |
133 | static int | |
134 | arm_get_set_operands (rtx producer, rtx consumer, | |
135 | rtx *set_source, rtx *set_destination) | |
136 | { | |
fad99894 | 137 | rtx set_producer = arm_find_sub_rtx_with_code (PATTERN (producer), |
138 | SET, false); | |
139 | rtx set_consumer = arm_find_sub_rtx_with_code (PATTERN (consumer), | |
140 | SET, false); | |
4c849ae7 | 141 | |
142 | if (set_producer && set_consumer) | |
143 | { | |
144 | *set_source = SET_DEST (set_producer); | |
145 | *set_destination = SET_SRC (set_consumer); | |
146 | return 1; | |
147 | } | |
148 | return 0; | |
149 | } | |
150 | ||
d049924d | 151 | bool |
3754d046 | 152 | aarch_rev16_shright_mask_imm_p (rtx val, machine_mode mode) |
d049924d | 153 | { |
154 | return CONST_INT_P (val) | |
4ea1b263 | 155 | && INTVAL (val) |
156 | == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff), | |
157 | mode); | |
d049924d | 158 | } |
159 | ||
160 | bool | |
3754d046 | 161 | aarch_rev16_shleft_mask_imm_p (rtx val, machine_mode mode) |
d049924d | 162 | { |
163 | return CONST_INT_P (val) | |
4ea1b263 | 164 | && INTVAL (val) |
165 | == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00), | |
166 | mode); | |
d049924d | 167 | } |
168 | ||
169 | ||
170 | static bool | |
3754d046 | 171 | aarch_rev16_p_1 (rtx lhs, rtx rhs, machine_mode mode) |
d049924d | 172 | { |
173 | if (GET_CODE (lhs) == AND | |
174 | && GET_CODE (XEXP (lhs, 0)) == ASHIFT | |
175 | && CONST_INT_P (XEXP (XEXP (lhs, 0), 1)) | |
176 | && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8 | |
177 | && REG_P (XEXP (XEXP (lhs, 0), 0)) | |
178 | && CONST_INT_P (XEXP (lhs, 1)) | |
179 | && GET_CODE (rhs) == AND | |
180 | && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT | |
181 | && REG_P (XEXP (XEXP (rhs, 0), 0)) | |
182 | && CONST_INT_P (XEXP (XEXP (rhs, 0), 1)) | |
183 | && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8 | |
184 | && CONST_INT_P (XEXP (rhs, 1)) | |
185 | && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0))) | |
186 | ||
187 | { | |
188 | rtx lhs_mask = XEXP (lhs, 1); | |
189 | rtx rhs_mask = XEXP (rhs, 1); | |
190 | ||
191 | return aarch_rev16_shright_mask_imm_p (rhs_mask, mode) | |
192 | && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode); | |
193 | } | |
194 | ||
195 | return false; | |
196 | } | |
197 | ||
198 | /* Recognise a sequence of bitwise operations corresponding to a rev16 operation. | |
199 | These will be of the form: | |
200 | ((x >> 8) & 0x00ff00ff) | |
201 | | ((x << 8) & 0xff00ff00) | |
202 | for SImode and with similar but wider bitmasks for DImode. | |
203 | The two sub-expressions of the IOR can appear on either side so check both | |
204 | permutations with the help of aarch_rev16_p_1 above. */ | |
205 | ||
206 | bool | |
207 | aarch_rev16_p (rtx x) | |
208 | { | |
209 | rtx left_sub_rtx, right_sub_rtx; | |
210 | bool is_rev = false; | |
211 | ||
212 | if (GET_CODE (x) != IOR) | |
213 | return false; | |
214 | ||
215 | left_sub_rtx = XEXP (x, 0); | |
216 | right_sub_rtx = XEXP (x, 1); | |
217 | ||
218 | /* There are no canonicalisation rules for the position of the two shifts | |
219 | involved in a rev, so try both permutations. */ | |
220 | is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x)); | |
221 | ||
222 | if (!is_rev) | |
223 | is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x)); | |
224 | ||
225 | return is_rev; | |
226 | } | |
227 | ||
4c849ae7 | 228 | /* Return nonzero if the CONSUMER instruction (a load) does need |
229 | PRODUCER's value to calculate the address. */ | |
230 | int | |
231 | arm_early_load_addr_dep (rtx producer, rtx consumer) | |
232 | { | |
233 | rtx value, addr; | |
234 | ||
235 | if (!arm_get_set_operands (producer, consumer, &value, &addr)) | |
236 | return 0; | |
015adf41 | 237 | |
238 | return reg_overlap_mentioned_p (value, addr); | |
239 | } | |
240 | ||
241 | /* Return nonzero if the CONSUMER instruction (an ALU op) does not | |
242 | have an early register shift value or amount dependency on the | |
243 | result of PRODUCER. */ | |
015adf41 | 244 | int |
245 | arm_no_early_alu_shift_dep (rtx producer, rtx consumer) | |
246 | { | |
4c849ae7 | 247 | rtx value, op; |
015adf41 | 248 | rtx early_op; |
249 | ||
4c849ae7 | 250 | if (!arm_get_set_operands (producer, consumer, &value, &op)) |
251 | return 0; | |
252 | ||
253 | if ((early_op = arm_find_shift_sub_rtx (op))) | |
254 | { | |
255 | if (REG_P (early_op)) | |
256 | early_op = op; | |
257 | ||
258 | return !reg_overlap_mentioned_p (value, early_op); | |
259 | } | |
260 | ||
261 | return 0; | |
015adf41 | 262 | } |
263 | ||
264 | /* Return nonzero if the CONSUMER instruction (an ALU op) does not | |
265 | have an early register shift value dependency on the result of | |
266 | PRODUCER. */ | |
015adf41 | 267 | int |
268 | arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer) | |
269 | { | |
4c849ae7 | 270 | rtx value, op; |
015adf41 | 271 | rtx early_op; |
272 | ||
4c849ae7 | 273 | if (!arm_get_set_operands (producer, consumer, &value, &op)) |
274 | return 0; | |
275 | ||
276 | if ((early_op = arm_find_shift_sub_rtx (op))) | |
277 | /* We want to check the value being shifted. */ | |
278 | if (!reg_overlap_mentioned_p (value, XEXP (early_op, 0))) | |
279 | return 1; | |
280 | ||
281 | return 0; | |
015adf41 | 282 | } |
283 | ||
284 | /* Return nonzero if the CONSUMER (a mul or mac op) does not | |
285 | have an early register mult dependency on the result of | |
286 | PRODUCER. */ | |
015adf41 | 287 | int |
288 | arm_no_early_mul_dep (rtx producer, rtx consumer) | |
289 | { | |
4c849ae7 | 290 | rtx value, op; |
291 | ||
292 | if (!arm_get_set_operands (producer, consumer, &value, &op)) | |
293 | return 0; | |
015adf41 | 294 | |
295 | if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS) | |
296 | { | |
297 | if (GET_CODE (XEXP (op, 0)) == MULT) | |
298 | return !reg_overlap_mentioned_p (value, XEXP (op, 0)); | |
299 | else | |
300 | return !reg_overlap_mentioned_p (value, XEXP (op, 1)); | |
301 | } | |
302 | ||
303 | return 0; | |
304 | } | |
305 | ||
306 | /* Return nonzero if the CONSUMER instruction (a store) does not need | |
307 | PRODUCER's value to calculate the address. */ | |
308 | ||
309 | int | |
310 | arm_no_early_store_addr_dep (rtx producer, rtx consumer) | |
311 | { | |
fad99894 | 312 | rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false); |
313 | rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false); | |
4c849ae7 | 314 | |
315 | if (value) | |
316 | value = SET_DEST (value); | |
317 | ||
318 | if (addr) | |
319 | addr = SET_DEST (addr); | |
320 | ||
321 | if (!value || !addr) | |
322 | return 0; | |
015adf41 | 323 | |
324 | return !reg_overlap_mentioned_p (value, addr); | |
325 | } | |
326 | ||
327 | /* Return nonzero if the CONSUMER instruction (a store) does need | |
328 | PRODUCER's value to calculate the address. */ | |
329 | ||
330 | int | |
331 | arm_early_store_addr_dep (rtx producer, rtx consumer) | |
332 | { | |
333 | return !arm_no_early_store_addr_dep (producer, consumer); | |
334 | } | |
335 | ||
336 | /* Return non-zero iff the consumer (a multiply-accumulate or a | |
337 | multiple-subtract instruction) has an accumulator dependency on the | |
338 | result of the producer and no other dependency on that result. It | |
339 | does not check if the producer is multiply-accumulate instruction. */ | |
340 | int | |
341 | arm_mac_accumulator_is_result (rtx producer, rtx consumer) | |
342 | { | |
343 | rtx result; | |
344 | rtx op0, op1, acc; | |
345 | ||
346 | producer = PATTERN (producer); | |
347 | consumer = PATTERN (consumer); | |
348 | ||
349 | if (GET_CODE (producer) == COND_EXEC) | |
350 | producer = COND_EXEC_CODE (producer); | |
351 | if (GET_CODE (consumer) == COND_EXEC) | |
352 | consumer = COND_EXEC_CODE (consumer); | |
353 | ||
354 | if (GET_CODE (producer) != SET) | |
355 | return 0; | |
356 | ||
357 | result = XEXP (producer, 0); | |
358 | ||
359 | if (GET_CODE (consumer) != SET) | |
360 | return 0; | |
361 | ||
362 | /* Check that the consumer is of the form | |
363 | (set (...) (plus (mult ...) (...))) | |
364 | or | |
365 | (set (...) (minus (...) (mult ...))). */ | |
366 | if (GET_CODE (XEXP (consumer, 1)) == PLUS) | |
367 | { | |
368 | if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT) | |
369 | return 0; | |
370 | ||
371 | op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0); | |
372 | op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1); | |
373 | acc = XEXP (XEXP (consumer, 1), 1); | |
374 | } | |
375 | else if (GET_CODE (XEXP (consumer, 1)) == MINUS) | |
376 | { | |
377 | if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT) | |
378 | return 0; | |
379 | ||
380 | op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0); | |
381 | op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1); | |
382 | acc = XEXP (XEXP (consumer, 1), 0); | |
383 | } | |
384 | else | |
385 | return 0; | |
386 | ||
387 | return (reg_overlap_mentioned_p (result, acc) | |
388 | && !reg_overlap_mentioned_p (result, op0) | |
389 | && !reg_overlap_mentioned_p (result, op1)); | |
390 | } | |
391 | ||
08993ad1 | 392 | /* Return non-zero if the destination of PRODUCER feeds the accumulator |
393 | operand of an MLA-like operation. */ | |
394 | ||
395 | int | |
396 | aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer) | |
397 | { | |
398 | rtx producer_set = single_set (producer); | |
399 | rtx consumer_set = single_set (consumer); | |
400 | ||
401 | /* We are looking for a SET feeding a SET. */ | |
402 | if (!producer_set || !consumer_set) | |
403 | return 0; | |
404 | ||
405 | rtx dest = SET_DEST (producer_set); | |
406 | rtx mla = SET_SRC (consumer_set); | |
407 | ||
408 | /* We're looking for a register SET. */ | |
409 | if (!REG_P (dest)) | |
410 | return 0; | |
411 | ||
412 | rtx accumulator; | |
413 | ||
414 | /* Strip a zero_extend. */ | |
415 | if (GET_CODE (mla) == ZERO_EXTEND) | |
416 | mla = XEXP (mla, 0); | |
417 | ||
418 | switch (GET_CODE (mla)) | |
419 | { | |
420 | case PLUS: | |
421 | /* Possibly an MADD. */ | |
422 | if (GET_CODE (XEXP (mla, 0)) == MULT) | |
423 | accumulator = XEXP (mla, 1); | |
424 | else | |
425 | return 0; | |
426 | break; | |
427 | case MINUS: | |
428 | /* Possibly an MSUB. */ | |
429 | if (GET_CODE (XEXP (mla, 1)) == MULT) | |
430 | accumulator = XEXP (mla, 0); | |
431 | else | |
432 | return 0; | |
433 | break; | |
434 | case FMA: | |
435 | { | |
436 | /* Possibly an FMADD/FMSUB/FNMADD/FNMSUB. */ | |
437 | if (REG_P (XEXP (mla, 1)) | |
438 | && REG_P (XEXP (mla, 2)) | |
439 | && (REG_P (XEXP (mla, 0)) | |
440 | || GET_CODE (XEXP (mla, 0)) == NEG)) | |
441 | ||
442 | { | |
443 | /* FMADD/FMSUB. */ | |
444 | accumulator = XEXP (mla, 2); | |
445 | } | |
446 | else if (REG_P (XEXP (mla, 1)) | |
447 | && GET_CODE (XEXP (mla, 2)) == NEG | |
448 | && (REG_P (XEXP (mla, 0)) | |
449 | || GET_CODE (XEXP (mla, 0)) == NEG)) | |
450 | { | |
451 | /* FNMADD/FNMSUB. */ | |
452 | accumulator = XEXP (XEXP (mla, 2), 0); | |
453 | } | |
454 | else | |
455 | return 0; | |
456 | break; | |
457 | } | |
458 | default: | |
459 | /* Not an MLA-like operation. */ | |
460 | return 0; | |
461 | } | |
462 | ||
74905ec3 | 463 | if (GET_CODE (accumulator) == SUBREG) |
464 | accumulator = SUBREG_REG (accumulator); | |
465 | ||
466 | if (!REG_P (accumulator)) | |
467 | return 0; | |
468 | ||
08993ad1 | 469 | return (REGNO (dest) == REGNO (accumulator)); |
470 | } | |
471 | ||
472 | /* Return nonzero if the CONSUMER instruction is some sort of | |
473 | arithmetic or logic + shift operation, and the register we are | |
474 | writing in PRODUCER is not used in a register shift by register | |
475 | operation. */ | |
476 | ||
477 | int | |
478 | aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *producer, | |
479 | rtx_insn *consumer) | |
480 | { | |
481 | rtx value, op; | |
482 | rtx early_op; | |
483 | ||
484 | if (!arm_get_set_operands (producer, consumer, &value, &op)) | |
485 | return 0; | |
486 | ||
487 | if ((early_op = arm_find_shift_sub_rtx (op))) | |
488 | { | |
489 | if (REG_P (early_op)) | |
490 | early_op = op; | |
491 | ||
492 | /* Any other canonicalisation of a shift is a shift-by-constant | |
493 | so we don't care. */ | |
494 | if (GET_CODE (early_op) == ASHIFT) | |
495 | return (!REG_P (XEXP (early_op, 0)) | |
496 | || !REG_P (XEXP (early_op, 1))); | |
497 | else | |
498 | return 1; | |
499 | } | |
500 | ||
501 | return 0; | |
502 | } | |
503 | ||
015adf41 | 504 | /* Return non-zero if the consumer (a multiply-accumulate instruction) |
505 | has an accumulator dependency on the result of the producer (a | |
506 | multiplication instruction) and no other dependency on that result. */ | |
507 | int | |
508 | arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer) | |
509 | { | |
510 | rtx mul = PATTERN (producer); | |
511 | rtx mac = PATTERN (consumer); | |
512 | rtx mul_result; | |
513 | rtx mac_op0, mac_op1, mac_acc; | |
514 | ||
515 | if (GET_CODE (mul) == COND_EXEC) | |
516 | mul = COND_EXEC_CODE (mul); | |
517 | if (GET_CODE (mac) == COND_EXEC) | |
518 | mac = COND_EXEC_CODE (mac); | |
519 | ||
520 | /* Check that mul is of the form (set (...) (mult ...)) | |
521 | and mla is of the form (set (...) (plus (mult ...) (...))). */ | |
522 | if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT) | |
523 | || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS | |
524 | || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT)) | |
525 | return 0; | |
526 | ||
527 | mul_result = XEXP (mul, 0); | |
528 | mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0); | |
529 | mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1); | |
530 | mac_acc = XEXP (XEXP (mac, 1), 1); | |
531 | ||
532 | return (reg_overlap_mentioned_p (mul_result, mac_acc) | |
533 | && !reg_overlap_mentioned_p (mul_result, mac_op0) | |
534 | && !reg_overlap_mentioned_p (mul_result, mac_op1)); | |
535 | } |