]>
Commit | Line | Data |
---|---|---|
015adf41 | 1 | /* Dependency checks for instruction scheduling, shared between ARM and |
2 | AARCH64. | |
3 | ||
d353bf18 | 4 | Copyright (C) 1991-2015 Free Software Foundation, Inc. |
015adf41 | 5 | Contributed by ARM Ltd. |
6 | ||
7 | This file is part of GCC. | |
8 | ||
9 | GCC is free software; you can redistribute it and/or modify it | |
10 | under the terms of the GNU General Public License as published | |
11 | by the Free Software Foundation; either version 3, or (at your | |
12 | option) any later version. | |
13 | ||
14 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
15 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
16 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
17 | License for more details. | |
18 | ||
19 | You should have received a copy of the GNU General Public License | |
20 | along with GCC; see the file COPYING3. If not see | |
21 | <http://www.gnu.org/licenses/>. */ | |
22 | ||
23 | ||
015adf41 | 24 | #include "config.h" |
25 | #include "system.h" | |
26 | #include "coretypes.h" | |
27 | #include "tm.h" | |
28 | #include "tm_p.h" | |
29 | #include "rtl.h" | |
b20a8bb4 | 30 | #include "alias.h" |
015adf41 | 31 | #include "tree.h" |
32 | #include "c-family/c-common.h" | |
33 | #include "rtl.h" | |
fad99894 | 34 | #include "rtl-iter.h" |
015adf41 | 35 | |
98d7984b | 36 | /* In ARMv8-A there's a general expectation that AESE/AESMC |
37 | and AESD/AESIMC sequences of the form: | |
38 | ||
39 | AESE Vn, _ | |
40 | AESMC Vn, Vn | |
41 | ||
42 | will issue both instructions in a single cycle on super-scalar | |
43 | implementations. This function identifies such pairs. */ | |
44 | ||
45 | int | |
50fc2d35 | 46 | aarch_crypto_can_dual_issue (rtx_insn *producer_insn, rtx_insn *consumer_insn) |
98d7984b | 47 | { |
50fc2d35 | 48 | rtx producer_set, consumer_set; |
98d7984b | 49 | rtx producer_src, consumer_src; |
50 | ||
50fc2d35 | 51 | producer_set = single_set (producer_insn); |
52 | consumer_set = single_set (consumer_insn); | |
98d7984b | 53 | |
50fc2d35 | 54 | producer_src = producer_set ? SET_SRC (producer_set) : NULL; |
55 | consumer_src = consumer_set ? SET_SRC (consumer_set) : NULL; | |
98d7984b | 56 | |
57 | if (producer_src && consumer_src | |
58 | && GET_CODE (producer_src) == UNSPEC && GET_CODE (consumer_src) == UNSPEC | |
59 | && ((XINT (producer_src, 1) == UNSPEC_AESE | |
60 | && XINT (consumer_src, 1) == UNSPEC_AESMC) | |
61 | || (XINT (producer_src, 1) == UNSPEC_AESD | |
62 | && XINT (consumer_src, 1) == UNSPEC_AESIMC))) | |
63 | { | |
50fc2d35 | 64 | unsigned int regno = REGNO (SET_DEST (producer_set)); |
98d7984b | 65 | |
50fc2d35 | 66 | return REGNO (SET_DEST (consumer_set)) == regno |
98d7984b | 67 | && REGNO (XVECEXP (consumer_src, 0, 0)) == regno; |
68 | } | |
69 | ||
70 | return 0; | |
71 | } | |
72 | ||
4c849ae7 | 73 | /* Return TRUE if X is either an arithmetic shift left, or |
74 | is a multiplication by a power of two. */ | |
daac2ec8 | 75 | bool |
4c849ae7 | 76 | arm_rtx_shift_left_p (rtx x) |
77 | { | |
78 | enum rtx_code code = GET_CODE (x); | |
dff74f11 | 79 | |
4c849ae7 | 80 | if (code == MULT && CONST_INT_P (XEXP (x, 1)) |
81 | && exact_log2 (INTVAL (XEXP (x, 1))) > 0) | |
82 | return true; | |
83 | ||
84 | if (code == ASHIFT) | |
85 | return true; | |
86 | ||
87 | return false; | |
88 | } | |
89 | ||
90 | static rtx_code shift_rtx_codes[] = | |
91 | { ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT, | |
92 | ROTATERT, ZERO_EXTEND, SIGN_EXTEND }; | |
93 | ||
fad99894 | 94 | /* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE. |
95 | If FIND_ANY_SHIFT then we are interested in anything which can | |
96 | reasonably be described as a SHIFT RTX. */ | |
4c849ae7 | 97 | static rtx |
98 | arm_find_sub_rtx_with_code (rtx pattern, rtx_code code, bool find_any_shift) | |
99 | { | |
fad99894 | 100 | subrtx_var_iterator::array_type array; |
101 | FOR_EACH_SUBRTX_VAR (iter, array, pattern, NONCONST) | |
102 | { | |
103 | rtx x = *iter; | |
104 | if (find_any_shift) | |
105 | { | |
106 | /* Left shifts might have been canonicalized to a MULT of some | |
107 | power of two. Make sure we catch them. */ | |
108 | if (arm_rtx_shift_left_p (x)) | |
109 | return x; | |
110 | else | |
111 | for (unsigned int i = 0; i < ARRAY_SIZE (shift_rtx_codes); i++) | |
112 | if (GET_CODE (x) == shift_rtx_codes[i]) | |
113 | return x; | |
114 | } | |
115 | ||
116 | if (GET_CODE (x) == code) | |
117 | return x; | |
118 | } | |
119 | return NULL_RTX; | |
4c849ae7 | 120 | } |
121 | ||
122 | /* Traverse PATTERN looking for any sub-rtx which looks like a shift. */ | |
123 | static rtx | |
124 | arm_find_shift_sub_rtx (rtx pattern) | |
125 | { | |
126 | return arm_find_sub_rtx_with_code (pattern, ASHIFT, true); | |
127 | } | |
128 | ||
129 | /* PRODUCER and CONSUMER are two potentially dependant RTX. PRODUCER | |
130 | (possibly) contains a SET which will provide a result we can access | |
131 | using the SET_DEST macro. We will place the RTX which would be | |
132 | written by PRODUCER in SET_SOURCE. | |
133 | Similarly, CONSUMER (possibly) contains a SET which has an operand | |
134 | we can access using SET_SRC. We place this operand in | |
135 | SET_DESTINATION. | |
136 | ||
137 | Return nonzero if we found the SET RTX we expected. */ | |
138 | static int | |
139 | arm_get_set_operands (rtx producer, rtx consumer, | |
140 | rtx *set_source, rtx *set_destination) | |
141 | { | |
fad99894 | 142 | rtx set_producer = arm_find_sub_rtx_with_code (PATTERN (producer), |
143 | SET, false); | |
144 | rtx set_consumer = arm_find_sub_rtx_with_code (PATTERN (consumer), | |
145 | SET, false); | |
4c849ae7 | 146 | |
147 | if (set_producer && set_consumer) | |
148 | { | |
149 | *set_source = SET_DEST (set_producer); | |
150 | *set_destination = SET_SRC (set_consumer); | |
151 | return 1; | |
152 | } | |
153 | return 0; | |
154 | } | |
155 | ||
d049924d | 156 | bool |
3754d046 | 157 | aarch_rev16_shright_mask_imm_p (rtx val, machine_mode mode) |
d049924d | 158 | { |
159 | return CONST_INT_P (val) | |
4ea1b263 | 160 | && INTVAL (val) |
161 | == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff), | |
162 | mode); | |
d049924d | 163 | } |
164 | ||
165 | bool | |
3754d046 | 166 | aarch_rev16_shleft_mask_imm_p (rtx val, machine_mode mode) |
d049924d | 167 | { |
168 | return CONST_INT_P (val) | |
4ea1b263 | 169 | && INTVAL (val) |
170 | == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00), | |
171 | mode); | |
d049924d | 172 | } |
173 | ||
174 | ||
175 | static bool | |
3754d046 | 176 | aarch_rev16_p_1 (rtx lhs, rtx rhs, machine_mode mode) |
d049924d | 177 | { |
178 | if (GET_CODE (lhs) == AND | |
179 | && GET_CODE (XEXP (lhs, 0)) == ASHIFT | |
180 | && CONST_INT_P (XEXP (XEXP (lhs, 0), 1)) | |
181 | && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8 | |
182 | && REG_P (XEXP (XEXP (lhs, 0), 0)) | |
183 | && CONST_INT_P (XEXP (lhs, 1)) | |
184 | && GET_CODE (rhs) == AND | |
185 | && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT | |
186 | && REG_P (XEXP (XEXP (rhs, 0), 0)) | |
187 | && CONST_INT_P (XEXP (XEXP (rhs, 0), 1)) | |
188 | && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8 | |
189 | && CONST_INT_P (XEXP (rhs, 1)) | |
190 | && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0))) | |
191 | ||
192 | { | |
193 | rtx lhs_mask = XEXP (lhs, 1); | |
194 | rtx rhs_mask = XEXP (rhs, 1); | |
195 | ||
196 | return aarch_rev16_shright_mask_imm_p (rhs_mask, mode) | |
197 | && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode); | |
198 | } | |
199 | ||
200 | return false; | |
201 | } | |
202 | ||
203 | /* Recognise a sequence of bitwise operations corresponding to a rev16 operation. | |
204 | These will be of the form: | |
205 | ((x >> 8) & 0x00ff00ff) | |
206 | | ((x << 8) & 0xff00ff00) | |
207 | for SImode and with similar but wider bitmasks for DImode. | |
208 | The two sub-expressions of the IOR can appear on either side so check both | |
209 | permutations with the help of aarch_rev16_p_1 above. */ | |
210 | ||
211 | bool | |
212 | aarch_rev16_p (rtx x) | |
213 | { | |
214 | rtx left_sub_rtx, right_sub_rtx; | |
215 | bool is_rev = false; | |
216 | ||
217 | if (GET_CODE (x) != IOR) | |
218 | return false; | |
219 | ||
220 | left_sub_rtx = XEXP (x, 0); | |
221 | right_sub_rtx = XEXP (x, 1); | |
222 | ||
223 | /* There are no canonicalisation rules for the position of the two shifts | |
224 | involved in a rev, so try both permutations. */ | |
225 | is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x)); | |
226 | ||
227 | if (!is_rev) | |
228 | is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x)); | |
229 | ||
230 | return is_rev; | |
231 | } | |
232 | ||
4c849ae7 | 233 | /* Return nonzero if the CONSUMER instruction (a load) does need |
234 | PRODUCER's value to calculate the address. */ | |
235 | int | |
236 | arm_early_load_addr_dep (rtx producer, rtx consumer) | |
237 | { | |
238 | rtx value, addr; | |
239 | ||
240 | if (!arm_get_set_operands (producer, consumer, &value, &addr)) | |
241 | return 0; | |
015adf41 | 242 | |
243 | return reg_overlap_mentioned_p (value, addr); | |
244 | } | |
245 | ||
246 | /* Return nonzero if the CONSUMER instruction (an ALU op) does not | |
247 | have an early register shift value or amount dependency on the | |
248 | result of PRODUCER. */ | |
015adf41 | 249 | int |
250 | arm_no_early_alu_shift_dep (rtx producer, rtx consumer) | |
251 | { | |
4c849ae7 | 252 | rtx value, op; |
015adf41 | 253 | rtx early_op; |
254 | ||
4c849ae7 | 255 | if (!arm_get_set_operands (producer, consumer, &value, &op)) |
256 | return 0; | |
257 | ||
258 | if ((early_op = arm_find_shift_sub_rtx (op))) | |
259 | { | |
260 | if (REG_P (early_op)) | |
261 | early_op = op; | |
262 | ||
263 | return !reg_overlap_mentioned_p (value, early_op); | |
264 | } | |
265 | ||
266 | return 0; | |
015adf41 | 267 | } |
268 | ||
269 | /* Return nonzero if the CONSUMER instruction (an ALU op) does not | |
270 | have an early register shift value dependency on the result of | |
271 | PRODUCER. */ | |
015adf41 | 272 | int |
273 | arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer) | |
274 | { | |
4c849ae7 | 275 | rtx value, op; |
015adf41 | 276 | rtx early_op; |
277 | ||
4c849ae7 | 278 | if (!arm_get_set_operands (producer, consumer, &value, &op)) |
279 | return 0; | |
280 | ||
281 | if ((early_op = arm_find_shift_sub_rtx (op))) | |
282 | /* We want to check the value being shifted. */ | |
283 | if (!reg_overlap_mentioned_p (value, XEXP (early_op, 0))) | |
284 | return 1; | |
285 | ||
286 | return 0; | |
015adf41 | 287 | } |
288 | ||
289 | /* Return nonzero if the CONSUMER (a mul or mac op) does not | |
290 | have an early register mult dependency on the result of | |
291 | PRODUCER. */ | |
015adf41 | 292 | int |
293 | arm_no_early_mul_dep (rtx producer, rtx consumer) | |
294 | { | |
4c849ae7 | 295 | rtx value, op; |
296 | ||
297 | if (!arm_get_set_operands (producer, consumer, &value, &op)) | |
298 | return 0; | |
015adf41 | 299 | |
300 | if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS) | |
301 | { | |
302 | if (GET_CODE (XEXP (op, 0)) == MULT) | |
303 | return !reg_overlap_mentioned_p (value, XEXP (op, 0)); | |
304 | else | |
305 | return !reg_overlap_mentioned_p (value, XEXP (op, 1)); | |
306 | } | |
307 | ||
308 | return 0; | |
309 | } | |
310 | ||
311 | /* Return nonzero if the CONSUMER instruction (a store) does not need | |
312 | PRODUCER's value to calculate the address. */ | |
313 | ||
314 | int | |
315 | arm_no_early_store_addr_dep (rtx producer, rtx consumer) | |
316 | { | |
fad99894 | 317 | rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false); |
318 | rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false); | |
4c849ae7 | 319 | |
320 | if (value) | |
321 | value = SET_DEST (value); | |
322 | ||
323 | if (addr) | |
324 | addr = SET_DEST (addr); | |
325 | ||
326 | if (!value || !addr) | |
327 | return 0; | |
015adf41 | 328 | |
329 | return !reg_overlap_mentioned_p (value, addr); | |
330 | } | |
331 | ||
332 | /* Return nonzero if the CONSUMER instruction (a store) does need | |
333 | PRODUCER's value to calculate the address. */ | |
334 | ||
335 | int | |
336 | arm_early_store_addr_dep (rtx producer, rtx consumer) | |
337 | { | |
338 | return !arm_no_early_store_addr_dep (producer, consumer); | |
339 | } | |
340 | ||
341 | /* Return non-zero iff the consumer (a multiply-accumulate or a | |
342 | multiple-subtract instruction) has an accumulator dependency on the | |
343 | result of the producer and no other dependency on that result. It | |
344 | does not check if the producer is multiply-accumulate instruction. */ | |
345 | int | |
346 | arm_mac_accumulator_is_result (rtx producer, rtx consumer) | |
347 | { | |
348 | rtx result; | |
349 | rtx op0, op1, acc; | |
350 | ||
351 | producer = PATTERN (producer); | |
352 | consumer = PATTERN (consumer); | |
353 | ||
354 | if (GET_CODE (producer) == COND_EXEC) | |
355 | producer = COND_EXEC_CODE (producer); | |
356 | if (GET_CODE (consumer) == COND_EXEC) | |
357 | consumer = COND_EXEC_CODE (consumer); | |
358 | ||
359 | if (GET_CODE (producer) != SET) | |
360 | return 0; | |
361 | ||
362 | result = XEXP (producer, 0); | |
363 | ||
364 | if (GET_CODE (consumer) != SET) | |
365 | return 0; | |
366 | ||
367 | /* Check that the consumer is of the form | |
368 | (set (...) (plus (mult ...) (...))) | |
369 | or | |
370 | (set (...) (minus (...) (mult ...))). */ | |
371 | if (GET_CODE (XEXP (consumer, 1)) == PLUS) | |
372 | { | |
373 | if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT) | |
374 | return 0; | |
375 | ||
376 | op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0); | |
377 | op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1); | |
378 | acc = XEXP (XEXP (consumer, 1), 1); | |
379 | } | |
380 | else if (GET_CODE (XEXP (consumer, 1)) == MINUS) | |
381 | { | |
382 | if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT) | |
383 | return 0; | |
384 | ||
385 | op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0); | |
386 | op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1); | |
387 | acc = XEXP (XEXP (consumer, 1), 0); | |
388 | } | |
389 | else | |
390 | return 0; | |
391 | ||
392 | return (reg_overlap_mentioned_p (result, acc) | |
393 | && !reg_overlap_mentioned_p (result, op0) | |
394 | && !reg_overlap_mentioned_p (result, op1)); | |
395 | } | |
396 | ||
397 | /* Return non-zero if the consumer (a multiply-accumulate instruction) | |
398 | has an accumulator dependency on the result of the producer (a | |
399 | multiplication instruction) and no other dependency on that result. */ | |
400 | int | |
401 | arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer) | |
402 | { | |
403 | rtx mul = PATTERN (producer); | |
404 | rtx mac = PATTERN (consumer); | |
405 | rtx mul_result; | |
406 | rtx mac_op0, mac_op1, mac_acc; | |
407 | ||
408 | if (GET_CODE (mul) == COND_EXEC) | |
409 | mul = COND_EXEC_CODE (mul); | |
410 | if (GET_CODE (mac) == COND_EXEC) | |
411 | mac = COND_EXEC_CODE (mac); | |
412 | ||
413 | /* Check that mul is of the form (set (...) (mult ...)) | |
414 | and mla is of the form (set (...) (plus (mult ...) (...))). */ | |
415 | if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT) | |
416 | || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS | |
417 | || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT)) | |
418 | return 0; | |
419 | ||
420 | mul_result = XEXP (mul, 0); | |
421 | mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0); | |
422 | mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1); | |
423 | mac_acc = XEXP (XEXP (mac, 1), 1); | |
424 | ||
425 | return (reg_overlap_mentioned_p (mul_result, mac_acc) | |
426 | && !reg_overlap_mentioned_p (mul_result, mac_op0) | |
427 | && !reg_overlap_mentioned_p (mul_result, mac_op1)); | |
428 | } |