]>
Commit | Line | Data |
---|---|---|
b2ca46df | 1 | ;; Samsung Exynos M1 pipeline description |
cbe34bb5 | 2 | ;; Copyright (C) 2014-2017 Free Software Foundation, Inc. |
b2ca46df EM |
3 | ;; |
4 | ;; This file is part of GCC. | |
5 | ;; | |
6 | ;; GCC is free software; you can redistribute it and/or modify it | |
7 | ;; under the terms of the GNU General Public License as published by | |
8 | ;; the Free Software Foundation; either version 3, or (at your option) | |
9 | ;; any later version. | |
10 | ;; | |
11 | ;; GCC is distributed in the hope that it will be useful, but | |
12 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | ;; General Public License for more details. | |
15 | ;; | |
16 | ;; You should have received a copy of the GNU General Public License | |
17 | ;; along with GCC; see the file COPYING3. If not see | |
18 | ;; <http://www.gnu.org/licenses/>. | |
19 | ||
20 | (define_attr "exynos_m1_neon_type" | |
21 | "neon_arith_simple, neon_arith_basic, neon_arith_complex, | |
22 | neon_multiply, neon_mla, neon_mla_q, neon_mla_long, neon_sat_mla_long, | |
23 | neon_shift_acc, neon_shift_imm_basic, neon_shift_imm_complex, | |
24 | neon_shift_reg_basic, neon_shift_reg_basic_q, | |
25 | neon_shift_reg_complex, neon_shift_reg_complex_q, | |
26 | neon_fp_unary, neon_fp_add, neon_fp_abd, neon_fp_compare, | |
27 | neon_fp_reduc_minmax, neon_fp_reduc_add, neon_fp_round, neon_fp_cvt, | |
28 | neon_fp_minmax, neon_fp_mul, neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, | |
29 | neon_fp_estimate, neon_fp_estimatex, neon_fp_step, | |
30 | neon_bitops, neon_bitops_q, neon_bitins, | |
31 | neon_to_gp, neon_from_gp, neon_move, neon_tbl, | |
32 | neon_load1_1, neon_load1_2, neon_load1_3, neon_load1_4, | |
33 | neon_load1_one, neon_load1_all, | |
34 | neon_load2_2, neon_load2_one, neon_load2_all, | |
35 | neon_load3_3, neon_load3_one, neon_load3_all, | |
36 | neon_load4_4, neon_load4_one, neon_load4_all, | |
37 | neon_store, | |
38 | neon_store1_1, neon_store1_2, neon_store1_3, neon_store1_4, neon_store1_one, | |
39 | neon_store2_2, neon_store2_one, | |
40 | neon_store3_3, neon_store3_one, | |
41 | neon_store4_4, neon_store4_one, | |
42 | unknown" | |
43 | (cond [ | |
44 | (eq_attr "type" "neon_abd, neon_abd_q, neon_abd_long,\ | |
45 | neon_abs, neon_abs_q,\ | |
46 | neon_minmax, neon_minmax_q") | |
47 | (const_string "neon_arith_simple") | |
48 | ||
49 | (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\ | |
50 | neon_neg, neon_neg_q,\ | |
51 | neon_sub, neon_sub_q, neon_sub_long, neon_sub_widen,\ | |
52 | neon_logic, neon_logic_q, neon_tst, neon_tst_q,\ | |
53 | neon_compare_zero, neon_compare_zero_q") | |
54 | (const_string "neon_arith_basic") | |
55 | ||
56 | (eq_attr "type" "neon_add_widen, neon_arith_acc, neon_arith_acc_q,\ | |
57 | neon_reduc_add, neon_reduc_add_q,\ | |
58 | neon_reduc_add_acc, neon_reduc_add_acc_q,\ | |
59 | neon_reduc_add_long, neon_add_halve_narrow_q,\ | |
60 | neon_add_halve, neon_add_halve_q,\ | |
61 | neon_sub_halve, neon_sub_halve_q, neon_qabs,\ | |
62 | neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\ | |
63 | neon_qneg_q, neon_qsub, neon_qsub_q,\ | |
64 | neon_sub_halve_narrow_q,\ | |
65 | neon_compare, neon_compare_q,\ | |
66 | neon_reduc_minmax, neon_reduc_minmax_q") | |
67 | (const_string "neon_arith_complex") | |
68 | ||
69 | (eq_attr "type" "neon_mul_b, neon_mul_b_q, neon_mul_h, neon_mul_h_q,\ | |
70 | neon_mul_s, neon_mul_s_q,\ | |
71 | neon_mul_h_scalar, neon_mul_h_scalar_q,\ | |
72 | neon_mul_s_scalar, neon_mul_s_scalar_q,\ | |
73 | neon_mul_h_scalar_long, neon_mul_s_scalar_long,\ | |
74 | neon_sat_mul_b, neon_sat_mul_b_q,\ | |
75 | neon_sat_mul_h, neon_sat_mul_h_q,\ | |
76 | neon_sat_mul_s, neon_sat_mul_s_q,\ | |
77 | neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q,\ | |
78 | neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q,\ | |
79 | neon_sat_mul_b_long, neon_sat_mul_h_long,\ | |
80 | neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\ | |
81 | neon_sat_mul_s_scalar_long") | |
82 | (const_string "neon_multiply") | |
83 | ||
84 | (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\ | |
85 | neon_mla_h_scalar, neon_mla_s_scalar,\ | |
86 | neon_mla_b_long, neon_mla_h_long,\ | |
87 | neon_mla_s_long,\ | |
88 | neon_mla_h_scalar_long, neon_mla_s_scalar_long,\ | |
89 | neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\ | |
90 | neon_mla_h_scalar_q, neon_mla_s_scalar_q") | |
91 | (const_string "neon_mla") | |
92 | ||
93 | (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\ | |
94 | neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\ | |
95 | neon_sat_mla_s_scalar_long") | |
96 | (const_string "neon_sat_mla_long") | |
97 | ||
98 | (eq_attr "type" "neon_shift_acc, neon_shift_acc_q") | |
99 | (const_string "neon_shift_acc") | |
100 | ||
101 | (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\ | |
102 | neon_shift_imm_narrow_q, neon_shift_imm_long") | |
103 | (const_string "neon_shift_imm_basic") | |
104 | ||
105 | (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\ | |
106 | neon_sat_shift_imm_narrow_q") | |
107 | (const_string "neon_shift_imm_complex") | |
108 | ||
109 | (eq_attr "type" "neon_shift_reg, neon_shift_reg_q") | |
110 | (const_string "neon_shift_reg_basic") | |
111 | ||
112 | (eq_attr "type" "neon_sat_shift_reg, neon_sat_shift_reg_q") | |
113 | (const_string "neon_shift_reg_complex") | |
114 | ||
115 | (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\ | |
116 | neon_fp_abs_s, neon_fp_abs_s_q,\ | |
117 | neon_fp_neg_d, neon_fp_neg_d_q,\ | |
118 | neon_fp_abs_d, neon_fp_abs_d_q") | |
119 | (const_string "neon_fp_unary") | |
120 | ||
121 | (eq_attr "type" "neon_fp_addsub_s, neon_fp_addsub_s_q,\ | |
122 | neon_fp_addsub_d, neon_fp_addsub_d_q") | |
123 | (const_string "neon_fp_add") | |
124 | ||
125 | (eq_attr "type" "neon_fp_abd_s, neon_fp_abd_s_q,\ | |
126 | neon_fp_abd_d, neon_fp_abd_d_q") | |
127 | (const_string "neon_fp_abd") | |
128 | ||
129 | (eq_attr "type" "neon_fp_compare_s, neon_fp_compare_s_q,\ | |
130 | neon_fp_compare_d, neon_fp_compare_d_q,\ | |
131 | neon_fp_minmax_s, neon_fp_minmax_s_q,\ | |
132 | neon_fp_minmax_d, neon_fp_minmax_d_q") | |
133 | (const_string "neon_fp_compare") | |
134 | ||
135 | (eq_attr "type" "neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_s_q,\ | |
136 | neon_fp_reduc_minmax_d, neon_fp_reduc_minmax_d_q") | |
137 | (const_string "neon_fp_reduc_minmax") | |
138 | ||
139 | (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_add_s_q,\ | |
140 | neon_fp_reduc_add_d, neon_fp_reduc_add_d_q") | |
141 | (const_string "neon_fp_reduc_add") | |
142 | ||
143 | (eq_attr "type" "neon_fp_round_s, neon_fp_round_s_q,\ | |
144 | neon_fp_round_d, neon_fp_round_d_q") | |
145 | (const_string "neon_fp_round") | |
146 | ||
147 | (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h, | |
148 | neon_fp_to_int_s, neon_fp_to_int_s_q,\ | |
149 | neon_fp_to_int_d_q, neon_fp_to_int_d,\ | |
150 | neon_int_to_fp_s, neon_int_to_fp_s_q,\ | |
151 | neon_int_to_fp_d, neon_int_to_fp_d_q") | |
152 | (const_string "neon_fp_cvt") | |
153 | ||
154 | (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_q,\ | |
155 | neon_fp_mul_s_scalar, neon_fp_mul_s_scalar_q,\ | |
156 | neon_fp_mul_d, neon_fp_mul_d_q,\ | |
157 | neon_fp_mul_d_scalar_q") | |
158 | (const_string "neon_fp_mul") | |
159 | ||
160 | (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_q,\ | |
161 | neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\ | |
162 | neon_fp_mla_d, neon_fp_mla_d_q,\ | |
163 | neon_fp_mla_d_scalar_q") | |
164 | (const_string "neon_fp_mla") | |
165 | ||
166 | (eq_attr "type" "neon_fp_recpe_s, neon_fp_recpe_s_q,\ | |
167 | neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\ | |
168 | neon_fp_recpe_d, neon_fp_recpe_d_q,\ | |
169 | neon_fp_rsqrte_d, neon_fp_rsqrte_d_q") | |
170 | (const_string "neon_fp_estimate") | |
171 | ||
172 | (eq_attr "type" "neon_fp_recpx_s, neon_fp_recpx_s_q,\ | |
173 | neon_fp_recpx_d, neon_fp_recpx_d_q") | |
174 | (const_string "neon_fp_estimatex") | |
175 | ||
176 | (eq_attr "type" "neon_fp_recps_s, neon_fp_recps_s_q,\ | |
177 | neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\ | |
178 | neon_fp_recps_d, neon_fp_recps_d_q,\ | |
179 | neon_fp_rsqrts_d, neon_fp_rsqrts_d_q") | |
180 | (const_string "neon_fp_step") | |
181 | ||
182 | (eq_attr "type" "neon_rbit, neon_rbit_q,\ | |
183 | neon_cls, neon_cls_q, neon_cnt, neon_cnt_q,\ | |
184 | neon_dup, neon_dup_q,\ | |
185 | neon_rev, neon_rev_q,\ | |
186 | neon_move, neon_move_q, | |
187 | neon_ext, neon_permute, neon_zip") | |
188 | (const_string "neon_bitops") | |
189 | ||
190 | (eq_attr "type" "neon_ext_q, neon_permute_q, neon_zip_q") | |
191 | (const_string "neon_bitops_q") | |
192 | ||
193 | (eq_attr "type" "neon_bsl, neon_bsl_q") | |
194 | (const_string "neon_bitins") | |
195 | ||
196 | (eq_attr "type" "neon_tbl1, neon_tbl2, neon_tbl3, neon_tbl4") | |
197 | (const_string "neon_tbl") | |
198 | ||
199 | (eq_attr "type" "neon_from_gp, neon_from_gp_q, f_mcr, f_mcrr") | |
200 | (const_string "neon_from_gp") | |
201 | ||
202 | (eq_attr "type" "neon_to_gp, neon_to_gp_q, f_mrc, f_mrrc") | |
203 | (const_string "neon_to_gp") | |
204 | ||
205 | (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q") | |
206 | (const_string "neon_load1_1") | |
207 | ||
208 | (eq_attr "type" "neon_load1_2reg, neon_load1_2reg_q") | |
209 | (const_string "neon_load1_2") | |
210 | ||
211 | (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q") | |
212 | (const_string "neon_load1_3") | |
213 | ||
214 | (eq_attr "type" "neon_load1_4reg, neon_load1_4reg_q") | |
215 | (const_string "neon_load1_4") | |
216 | ||
217 | (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q") | |
218 | (const_string "neon_load1_one") | |
219 | ||
220 | (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q") | |
221 | (const_string "neon_load1_all") | |
222 | ||
223 | (eq_attr "type" "neon_load2_2reg, neon_load2_2reg_q,\ | |
224 | neon_load2_4reg, neon_load2_4reg_q") | |
225 | (const_string "neon_load2_2") | |
226 | ||
227 | (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q") | |
228 | (const_string "neon_load2_one") | |
229 | ||
230 | (eq_attr "type" "neon_load2_all_lanes, neon_load2_all_lanes_q") | |
231 | (const_string "neon_load2_all") | |
232 | ||
233 | (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q") | |
234 | (const_string "neon_load3_3") | |
235 | ||
236 | (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q") | |
237 | (const_string "neon_load3_one") | |
238 | ||
239 | (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q") | |
240 | (const_string "neon_load3_all") | |
241 | ||
242 | (eq_attr "type" "neon_load4_4reg, neon_load4_4reg_q") | |
243 | (const_string "neon_load4_4") | |
244 | ||
245 | (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q") | |
246 | (const_string "neon_load4_one") | |
247 | ||
248 | (eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q") | |
249 | (const_string "neon_load4_all") | |
250 | ||
b2ca46df EM |
251 | (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q") |
252 | (const_string "neon_store1_1") | |
253 | ||
254 | (eq_attr "type" "neon_store1_2reg, neon_store1_2reg_q") | |
255 | (const_string "neon_store1_2") | |
256 | ||
257 | (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q") | |
258 | (const_string "neon_store1_3") | |
259 | ||
260 | (eq_attr "type" "neon_store1_4reg, neon_store1_4reg_q") | |
261 | (const_string "neon_store1_4") | |
262 | ||
263 | (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q") | |
264 | (const_string "neon_store1_one") | |
265 | ||
266 | (eq_attr "type" "neon_store2_2reg, neon_store2_2reg_q,\ | |
267 | neon_store2_4reg, neon_store2_4reg_q") | |
268 | (const_string "neon_store2_2") | |
269 | ||
270 | (eq_attr "type" "neon_store2_one_lane, neon_store2_one_lane_q") | |
271 | (const_string "neon_store2_one") | |
272 | ||
273 | (eq_attr "type" "neon_store3_3reg, neon_store3_3reg_q") | |
274 | (const_string "neon_store3_3") | |
275 | ||
276 | (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q") | |
277 | (const_string "neon_store3_one") | |
278 | ||
279 | (eq_attr "type" "neon_store4_4reg, neon_store4_4reg_q") | |
280 | (const_string "neon_store4_4") | |
281 | ||
282 | (eq_attr "type" "neon_store4_one_lane, neon_store4_one_lane_q") | |
283 | (const_string "neon_store4_one")] | |
284 | ||
285 | (const_string "unknown"))) | |
286 | ||
287 | ;; The Exynos M1 core is modeled as a triple issue pipeline that has | |
288 | ;; the following functional units. | |
289 | ||
290 | (define_automaton "exynos_m1_gp") | |
291 | (define_automaton "exynos_m1_ls") | |
292 | (define_automaton "exynos_m1_fp") | |
293 | ||
294 | ;; 1. Two pipelines for simple integer operations: A, B | |
295 | ;; 2. One pipeline for simple or complex integer operations: C | |
296 | ||
297 | (define_cpu_unit "em1_xa, em1_xb, em1_xc" "exynos_m1_gp") | |
298 | ||
299 | (define_reservation "em1_alu" "(em1_xa | em1_xb | em1_xc)") | |
300 | (define_reservation "em1_c" "em1_xc") | |
301 | ||
302 | ;; 3. Two asymmetric pipelines for Neon and FP operations: F0, F1 | |
303 | ||
304 | (define_cpu_unit "em1_f0, em1_f1" "exynos_m1_fp") | |
305 | ||
306 | (define_reservation "em1_fmac" "em1_f0") | |
307 | (define_reservation "em1_fcvt" "em1_f0") | |
308 | (define_reservation "em1_nalu" "(em1_f0 | em1_f1)") | |
309 | (define_reservation "em1_nalu0" "em1_f0") | |
310 | (define_reservation "em1_nalu1" "em1_f1") | |
311 | (define_reservation "em1_nmisc" "em1_f0") | |
312 | (define_reservation "em1_ncrypt" "em1_f0") | |
313 | (define_reservation "em1_fadd" "em1_f1") | |
314 | (define_reservation "em1_fvar" "em1_f1") | |
315 | (define_reservation "em1_fst" "em1_f1") | |
316 | ||
317 | ;; 4. One pipeline for branch operations: BX | |
318 | ||
319 | (define_cpu_unit "em1_bx" "exynos_m1_gp") | |
320 | ||
321 | (define_reservation "em1_br" "em1_bx") | |
322 | ||
323 | ;; 5. One AGU for loads: L | |
324 | ;; One AGU for stores and one pipeline for stores: S, SD | |
325 | ||
326 | (define_cpu_unit "em1_lx" "exynos_m1_ls") | |
327 | (define_cpu_unit "em1_sx, em1_sd" "exynos_m1_ls") | |
328 | ||
329 | (define_reservation "em1_ld" "em1_lx") | |
330 | (define_reservation "em1_st" "(em1_sx + em1_sd)") | |
331 | ||
332 | ;; Common occurrences | |
333 | (define_reservation "em1_sfst" "(em1_fst + em1_st)") | |
334 | (define_reservation "em1_lfst" "(em1_fst + em1_ld)") | |
335 | ||
336 | ;; Branches | |
337 | ;; | |
338 | ;; No latency as there is no result | |
339 | ;; TODO: Unconditional branches use no units; | |
340 | ;; conditional branches add the BX unit; | |
341 | ;; indirect branches add the C unit. | |
342 | (define_insn_reservation "exynos_m1_branch" 0 | |
343 | (and (eq_attr "tune" "exynosm1") | |
344 | (eq_attr "type" "branch")) | |
345 | "em1_br") | |
346 | ||
347 | (define_insn_reservation "exynos_m1_call" 1 | |
348 | (and (eq_attr "tune" "exynosm1") | |
349 | (eq_attr "type" "call")) | |
350 | "em1_alu") | |
351 | ||
352 | ;; Basic ALU | |
353 | ;; | |
354 | ;; Simple ALU without shift, non-predicated | |
355 | (define_insn_reservation "exynos_m1_alu" 1 | |
356 | (and (eq_attr "tune" "exynosm1") | |
357 | (and (not (eq_attr "predicated" "yes")) | |
358 | (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\ | |
359 | alu_sreg, alus_sreg, logic_reg, logics_reg,\ | |
360 | adc_imm, adcs_imm, adc_reg, adcs_reg,\ | |
94f7a25e | 361 | adr, bfm, bfx, clz, rbit, rev, csel, alu_dsp_reg,\ |
b2ca46df EM |
362 | shift_imm, shift_reg, rotate_imm, extend,\ |
363 | mov_imm, mov_reg,\ | |
364 | mvn_imm, mvn_reg,\ | |
365 | mrs, multiple"))) | |
366 | "em1_alu") | |
367 | ||
368 | ;; Simple ALU without shift, predicated | |
369 | (define_insn_reservation "exynos_m1_alu_p" 1 | |
370 | (and (eq_attr "tune" "exynosm1") | |
371 | (and (eq_attr "predicated" "yes") | |
372 | (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\ | |
373 | alu_sreg, alus_sreg, logic_reg, logics_reg,\ | |
374 | adc_imm, adcs_imm, adc_reg, adcs_reg,\ | |
94f7a25e | 375 | adr, bfm, bfx, clz, rbit, rev, alu_dsp_reg,\ |
b2ca46df EM |
376 | shift_imm, shift_reg, rotate_imm, extend,\ |
377 | mov_imm, mov_reg,\ | |
378 | mvn_imm, mvn_reg,\ | |
379 | mrs, multiple"))) | |
380 | "em1_c") | |
381 | ||
382 | ;; ALU ops with immediate shift | |
383 | ;; TODO: if the shift value is between 0 and 3, the latency is just 1 cycle; | |
384 | ;; otherwise it takes 2 cycles and the unit is blocked; | |
385 | ;; for now, assume the latter's latency and the former's units. | |
386 | (define_insn_reservation "exynos_m1_alu_shift" 2 | |
387 | (and (eq_attr "tune" "exynosm1") | |
388 | (eq_attr "type" "alu_ext, alus_ext,\ | |
389 | alu_shift_imm, alus_shift_imm,\ | |
390 | logic_shift_imm, logics_shift_imm,\ | |
391 | mov_shift, mvn_shift")) | |
392 | "(em1_alu)") | |
393 | ||
394 | ;; ALU ops with register controlled shift, non-predicated | |
395 | (define_insn_reservation "exynos_m1_alu_shift_reg" 2 | |
396 | (and (eq_attr "tune" "exynosm1") | |
397 | (and (not (eq_attr "predicated" "yes")) | |
398 | (eq_attr "type" "alu_shift_reg, alus_shift_reg,\ | |
399 | logic_shift_reg, logics_shift_reg,\ | |
400 | mov_shift_reg, mvn_shift_reg"))) | |
401 | "(em1_alu * 2)") | |
402 | ||
403 | ;; ALU ops with register controlled shift, predicated | |
404 | (define_insn_reservation "exynos_m1_alu_shift_reg_p" 2 | |
405 | (and (eq_attr "tune" "exynosm1") | |
406 | (and (eq_attr "predicated" "yes") | |
407 | (eq_attr "type" "alu_shift_reg, alus_shift_reg,\ | |
408 | logic_shift_reg, logics_shift_reg,\ | |
409 | mov_shift_reg, mvn_shift_reg"))) | |
410 | "(em1_alu, em1_c)") | |
411 | ||
412 | ;; Integer multiply | |
413 | (define_insn_reservation "exynos_m1_mla" 3 | |
414 | (and (eq_attr "tune" "exynosm1") | |
415 | (eq_attr "mul32" "yes")) | |
416 | "em1_c") | |
417 | ||
418 | (define_insn_reservation "exynos_m1_mlal" 4 | |
419 | (and (eq_attr "tune" "exynosm1") | |
420 | (eq_attr "mul64" "yes")) | |
421 | "em1_alu, em1_c") | |
422 | ||
423 | ;; Integer divide | |
424 | ;; TODO: assume the median latency; blocks other divisions | |
425 | (define_insn_reservation "exynos_m1_div" 13 | |
426 | (and (eq_attr "tune" "exynosm1") | |
427 | (eq_attr "type" "udiv, sdiv")) | |
428 | "em1_c") | |
429 | ||
430 | ;; Load-store execution Unit | |
431 | ;; | |
432 | ;; Loads of up to 2 words. | |
433 | (define_insn_reservation "exynos_m1_load" 4 | |
434 | (and (eq_attr "tune" "exynosm1") | |
435 | (eq_attr "type" "load_byte, load1, load2")) | |
436 | "em1_ld") | |
437 | ||
438 | ;; Loads of 3 or 4 words. | |
439 | (define_insn_reservation "exynos_m1_loadm" 6 | |
440 | (and (eq_attr "tune" "exynosm1") | |
441 | (eq_attr "type" "load3, load4")) | |
442 | "(em1_ld * 3)") | |
443 | ||
444 | ;; Stores of up to 2 words. | |
445 | (define_insn_reservation "exynos_m1_store" 1 | |
446 | (and (eq_attr "tune" "exynosm1") | |
447 | (eq_attr "type" "store1, store2")) | |
448 | "em1_st") | |
449 | ||
450 | ;; Stores of 3 or 4 words. | |
451 | (define_insn_reservation "exynos_m1_storem" 3 | |
452 | (and (eq_attr "tune" "exynosm1") | |
453 | (eq_attr "type" "store3, store4")) | |
454 | "(em1_st * 3)") | |
455 | ||
456 | ;; Advanced SIMD Unit | |
457 | ;; | |
458 | ;; Integer Arithmetic Instructions. | |
459 | ||
460 | (define_insn_reservation "exynos_m1_arith_simple" 1 | |
461 | (and (eq_attr "tune" "exynosm1") | |
462 | (eq_attr "exynos_m1_neon_type" "neon_arith_simple")) | |
463 | "em1_nmisc") | |
464 | ||
465 | (define_insn_reservation "exynos_m1_neon_arith_basic" 2 | |
466 | (and (eq_attr "tune" "exynosm1") | |
467 | (eq_attr "exynos_m1_neon_type" "neon_arith_basic")) | |
468 | "em1_nalu") | |
469 | ||
470 | (define_insn_reservation "exynos_m1_neon_arith_complex" 3 | |
471 | (and (eq_attr "tune" "exynosm1") | |
472 | (eq_attr "exynos_m1_neon_type" "neon_arith_complex")) | |
473 | "em1_nmisc") | |
474 | ||
475 | ;; Integer Multiply Instructions. | |
476 | ||
477 | (define_insn_reservation "exynos_m1_neon_multiply" 4 | |
478 | (and (eq_attr "tune" "exynosm1") | |
479 | (eq_attr "exynos_m1_neon_type" | |
480 | "neon_multiply, neon_mla, neon_sat_mla_long")) | |
481 | "em1_nmisc") | |
482 | ||
483 | ;; Integer Shift Instructions. | |
484 | ||
485 | (define_insn_reservation | |
486 | "exynos_m1_neon_shift_acc" 4 | |
487 | (and (eq_attr "tune" "exynosm1") | |
488 | (eq_attr "exynos_m1_neon_type" "neon_shift_acc")) | |
489 | "em1_nalu1") | |
490 | ||
491 | (define_insn_reservation | |
492 | "exynos_m1_neon_shift_basic" 2 | |
493 | (and (eq_attr "tune" "exynosm1") | |
494 | (eq_attr "exynos_m1_neon_type" | |
495 | "neon_shift_imm_basic, neon_shift_reg_basic")) | |
496 | "em1_nalu") | |
497 | ||
498 | (define_insn_reservation | |
499 | "exynos_m1_neon_shift_complex" 4 | |
500 | (and (eq_attr "tune" "exynosm1") | |
501 | (eq_attr "exynos_m1_neon_type" | |
502 | "neon_shift_imm_complex, neon_shift_reg_complex")) | |
503 | "em1_nalu1") | |
504 | ||
505 | ;; Floating Point Instructions. | |
506 | ||
507 | (define_insn_reservation | |
508 | "exynos_m1_neon_fp_unary" 2 | |
509 | (and (eq_attr "tune" "exynosm1") | |
510 | (eq_attr "exynos_m1_neon_type" "neon_fp_unary")) | |
511 | "em1_nalu") | |
512 | ||
513 | (define_insn_reservation | |
514 | "exynos_m1_neon_fp_add" 4 | |
515 | (and (eq_attr "tune" "exynosm1") | |
516 | (eq_attr "exynos_m1_neon_type" "neon_fp_add")) | |
517 | "em1_fadd") | |
518 | ||
519 | (define_insn_reservation | |
520 | "exynos_m1_neon_fp_abd" 3 | |
521 | (and (eq_attr "tune" "exynosm1") | |
522 | (eq_attr "exynos_m1_neon_type" "neon_fp_abd")) | |
523 | "em1_nmisc") | |
524 | ||
525 | (define_insn_reservation | |
526 | "exynos_m1_neon_fp_compare" 1 | |
527 | (and (eq_attr "tune" "exynosm1") | |
528 | (eq_attr "exynos_m1_neon_type" "neon_fp_compare")) | |
529 | "em1_nmisc") | |
530 | ||
531 | ;; TODO: the latency and throughput of reduce insns actually varies between | |
532 | ;; 3-5 and 1/4-1, but picked the median values. | |
533 | (define_insn_reservation | |
534 | "exynos_m1_neon_fp_reduc" 5 | |
535 | (and (eq_attr "tune" "exynosm1") | |
536 | (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_minmax")) | |
537 | "(em1_nmisc * 4)") | |
538 | ||
539 | (define_insn_reservation | |
540 | "exynos_m1_neon_fp_reduc_add" 10 | |
541 | (and (eq_attr "tune" "exynosm1") | |
542 | (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_add")) | |
543 | "((em1_nalu * 2), em1_fadd)") | |
544 | ||
545 | (define_insn_reservation | |
546 | "exynos_m1_neon_fp_round" 4 | |
547 | (and (eq_attr "tune" "exynosm1") | |
548 | (eq_attr "exynos_m1_neon_type" "neon_fp_round")) | |
549 | "em1_fcvt") | |
550 | ||
551 | (define_insn_reservation | |
552 | "exynos_m1_neon_fp_cvt" 4 | |
553 | (and (eq_attr "tune" "exynosm1") | |
554 | (eq_attr "exynos_m1_neon_type" "neon_fp_cvt")) | |
555 | "em1_fcvt") | |
556 | ||
557 | (define_insn_reservation | |
558 | "exynos_m1_neon_fp_mul" 5 | |
559 | (and (eq_attr "tune" "exynosm1") | |
560 | (eq_attr "exynos_m1_neon_type" "neon_fp_mul")) | |
561 | "em1_fmac") | |
562 | ||
563 | (define_insn_reservation | |
564 | "exynos_m1_neon_fp_mla" 6 | |
565 | (and (eq_attr "tune" "exynosm1") | |
566 | (eq_attr "exynos_m1_neon_type" "neon_fp_mla")) | |
567 | "em1_fmac") | |
568 | ||
569 | (define_insn_reservation | |
570 | "exynos_m1_neon_fp_estimate" 5 | |
571 | (and (eq_attr "tune" "exynosm1") | |
572 | (eq_attr "exynos_m1_neon_type" "neon_fp_estimate")) | |
573 | "em1_fcvt") | |
574 | ||
575 | (define_insn_reservation | |
576 | "exynos_m1_neon_fp_estimatex" 1 | |
577 | (and (eq_attr "tune" "exynosm1") | |
578 | (eq_attr "exynos_m1_neon_type" "neon_fp_estimatex")) | |
579 | "em1_nmisc") | |
580 | ||
581 | (define_insn_reservation | |
582 | "exynos_m1_neon_fp_step" 6 | |
583 | (and (eq_attr "tune" "exynosm1") | |
584 | (eq_attr "exynos_m1_neon_type" "neon_fp_step")) | |
585 | "em1_fmac") | |
586 | ||
587 | ;; Miscellaneous Instructions. | |
588 | ||
589 | (define_insn_reservation | |
590 | "exynos_m1_neon_bitops" 2 | |
591 | (and (eq_attr "tune" "exynosm1") | |
592 | (eq_attr "exynos_m1_neon_type" "neon_bitops")) | |
593 | "em1_nalu") | |
594 | ||
595 | (define_insn_reservation | |
596 | "exynos_m1_neon_bitops_q" 3 | |
597 | (and (eq_attr "tune" "exynosm1") | |
598 | (eq_attr "exynos_m1_neon_type" "neon_bitops_q")) | |
599 | "(em1_nalu, em1_nalu)") | |
600 | ||
601 | (define_insn_reservation | |
602 | "exynos_m1_neon_bitins" 2 | |
603 | (and (eq_attr "tune" "exynosm1") | |
604 | (eq_attr "exynos_m1_neon_type" "neon_bitins")) | |
605 | "em1_nalu1") | |
606 | ||
607 | ;; TODO: it is more complicated than this. | |
608 | (define_insn_reservation | |
609 | "exynos_m1_neon_tbl" 2 | |
610 | (and (eq_attr "tune" "exynosm1") | |
611 | (eq_attr "exynos_m1_neon_type" "neon_tbl")) | |
612 | "em1_nalu1") | |
613 | ||
614 | (define_insn_reservation | |
615 | "exynos_m1_neon_from_gp" 4 | |
616 | (and (eq_attr "tune" "exynosm1") | |
617 | (eq_attr "exynos_m1_neon_type" "neon_from_gp")) | |
618 | "em1_st") | |
619 | ||
620 | (define_insn_reservation | |
621 | "exynos_m1_neon_to_gp" 9 | |
622 | (and (eq_attr "tune" "exynosm1") | |
623 | (eq_attr "exynos_m1_neon_type" "neon_to_gp")) | |
624 | "em1_lfst") | |
625 | ||
626 | ;; Load Instructions. | |
627 | ||
628 | (define_insn_reservation | |
629 | "exynos_m1_neon_load" 5 | |
630 | (and (eq_attr "tune" "exynosm1") | |
631 | (eq_attr "type" "f_loads, f_loadd, neon_ldp")) | |
632 | "em1_ld") | |
633 | ||
634 | (define_insn_reservation | |
635 | "exynos_m1_neon_load_q" 6 | |
636 | (and (eq_attr "tune" "exynosm1") | |
637 | (eq_attr "type" "neon_ldp_q")) | |
638 | "(em1_ld, em1_ld)") | |
639 | ||
640 | (define_insn_reservation | |
641 | "exynos_m1_neon_load1_1" 6 | |
642 | (and (eq_attr "tune" "exynosm1") | |
643 | (eq_attr "exynos_m1_neon_type" "neon_load1_1, neon_load1_all")) | |
644 | "em1_ld") | |
645 | ||
646 | (define_insn_reservation | |
647 | "exynos_m1_neon_load1_2" 6 | |
648 | (and (eq_attr "tune" "exynosm1") | |
649 | (eq_attr "exynos_m1_neon_type" "neon_load1_2")) | |
650 | "(em1_ld * 2)") | |
651 | ||
652 | (define_insn_reservation | |
653 | "exynos_m1_neon_load1_3" 7 | |
654 | (and (eq_attr "tune" "exynosm1") | |
655 | (eq_attr "exynos_m1_neon_type" "neon_load1_3")) | |
656 | "(em1_ld * 3)") | |
657 | ||
658 | (define_insn_reservation | |
659 | "exynos_m1_neon_load1_4" 8 | |
660 | (and (eq_attr "tune" "exynosm1") | |
661 | (eq_attr "exynos_m1_neon_type" "neon_load1_4")) | |
662 | "(em1_ld * 4)") | |
663 | ||
664 | (define_insn_reservation | |
665 | "exynos_m1_neon_load1_one" 7 | |
666 | (and (eq_attr "tune" "exynosm1") | |
667 | (eq_attr "exynos_m1_neon_type" "neon_load1_one")) | |
668 | "((em1_ld * 2), em1_nalu)") | |
669 | ||
670 | (define_insn_reservation | |
671 | "exynos_m1_neon_load2_2" 10 | |
672 | (and (eq_attr "tune" "exynosm1") | |
673 | (eq_attr "exynos_m1_neon_type" "neon_load2_2")) | |
674 | "(em1_ld * 5)") | |
675 | ||
676 | (define_insn_reservation | |
677 | "exynos_m1_neon_load2_one" 7 | |
678 | (and (eq_attr "tune" "exynosm1") | |
679 | (eq_attr "exynos_m1_neon_type" "neon_load2_one")) | |
680 | "((em1_ld * 2), (em1_nalu * 2))") | |
681 | ||
682 | (define_insn_reservation | |
683 | "exynos_m1_neon_load2_all" 6 | |
684 | (and (eq_attr "tune" "exynosm1") | |
685 | (eq_attr "exynos_m1_neon_type" "neon_load2_all")) | |
686 | "(em1_ld * 2)") | |
687 | ||
688 | (define_insn_reservation | |
689 | "exynos_m1_neon_load3_3" 12 | |
690 | (and (eq_attr "tune" "exynosm1") | |
691 | (eq_attr "exynos_m1_neon_type" "neon_load3_3")) | |
692 | "(em1_ld * 6)") | |
693 | ||
694 | (define_insn_reservation | |
695 | "exynos_m1_neon_load3_one" 9 | |
696 | (and (eq_attr "tune" "exynosm1") | |
697 | (eq_attr "exynos_m1_neon_type" "neon_load3_one")) | |
698 | "((em1_ld * 4), (em1_nalu * 3))") | |
699 | ||
700 | (define_insn_reservation | |
701 | "exynos_m1_neon_load3_all" 7 | |
702 | (and (eq_attr "tune" "exynosm1") | |
703 | (eq_attr "exynos_m1_neon_type" "neon_load3_all")) | |
704 | "(em1_ld * 3)") | |
705 | ||
706 | (define_insn_reservation | |
707 | "exynos_m1_neon_load4_4" 14 | |
708 | (and (eq_attr "tune" "exynosm1") | |
709 | (eq_attr "exynos_m1_neon_type" "neon_load4_4")) | |
710 | "(em1_ld * 7)") | |
711 | ||
712 | (define_insn_reservation | |
713 | "exynos_m1_neon_load4_one" 9 | |
714 | (and (eq_attr "tune" "exynosm1") | |
715 | (eq_attr "exynos_m1_neon_type" "neon_load4_one")) | |
716 | "((em1_ld * 4), (em1_nalu * 4))") | |
717 | ||
718 | (define_insn_reservation | |
719 | "exynos_m1_neon_load4_all" 8 | |
720 | (and (eq_attr "tune" "exynosm1") | |
721 | (eq_attr "exynos_m1_neon_type" "neon_load4_all")) | |
722 | "(em1_ld * 4)") | |
723 | ||
724 | ;; Store Instructions. | |
725 | ||
726 | (define_insn_reservation | |
727 | "exynos_m1_neon_store" 1 | |
728 | (and (eq_attr "tune" "exynosm1") | |
25cc2199 EM |
729 | (eq_attr "type" "f_stores, f_stored, neon_stp")) |
730 | "em1_sfst") | |
731 | ||
732 | (define_insn_reservation | |
733 | "exynos_m1_neon_store_q" 3 | |
734 | (and (eq_attr "tune" "exynosm1") | |
735 | (eq_attr "type" "neon_stp_q")) | |
736 | "(em1_sfst * 2)") | |
b2ca46df EM |
737 | |
738 | (define_insn_reservation | |
739 | "exynos_m1_neon_store1_1" 1 | |
740 | (and (eq_attr "tune" "exynosm1") | |
741 | (eq_attr "exynos_m1_neon_type" "neon_store1_1")) | |
742 | "em1_sfst") | |
743 | ||
744 | (define_insn_reservation | |
745 | "exynos_m1_neon_store1_2" 2 | |
746 | (and (eq_attr "tune" "exynosm1") | |
747 | (eq_attr "exynos_m1_neon_type" "neon_store1_2")) | |
748 | "(em1_sfst * 2)") | |
749 | ||
750 | (define_insn_reservation | |
751 | "exynos_m1_neon_store1_3" 3 | |
752 | (and (eq_attr "tune" "exynosm1") | |
753 | (eq_attr "exynos_m1_neon_type" "neon_store1_3")) | |
754 | "(em1_sfst * 3)") | |
755 | ||
756 | (define_insn_reservation | |
757 | "exynos_m1_neon_store1_4" 4 | |
758 | (and (eq_attr "tune" "exynosm1") | |
759 | (eq_attr "exynos_m1_neon_type" "neon_store1_4")) | |
760 | "(em1_sfst * 4)") | |
761 | ||
762 | (define_insn_reservation | |
763 | "exynos_m1_neon_store1_one" 7 | |
764 | (and (eq_attr "tune" "exynosm1") | |
765 | (eq_attr "exynos_m1_neon_type" "neon_store1_one")) | |
25cc2199 | 766 | "em1_sfst") |
b2ca46df EM |
767 | |
768 | (define_insn_reservation | |
769 | "exynos_m1_neon_store2" 7 | |
770 | (and (eq_attr "tune" "exynosm1") | |
771 | (eq_attr "exynos_m1_neon_type" "neon_store2_2, neon_store2_one")) | |
772 | "em1_sfst, em1_fst") | |
773 | ||
774 | (define_insn_reservation | |
775 | "exynos_m1_neon_store3" 16 | |
776 | (and (eq_attr "tune" "exynosm1") | |
777 | (eq_attr "exynos_m1_neon_type" "neon_store3_3, neon_store3_one")) | |
778 | "((em1_sfst * 3), (em1_fst * 2), em1_nalu)") | |
779 | ||
780 | (define_insn_reservation | |
781 | "exynos_m1_neon_store4" 17 | |
782 | (and (eq_attr "tune" "exynosm1") | |
783 | (eq_attr "exynos_m1_neon_type" "neon_store4_4, neon_store4_one")) | |
784 | "((em1_sfst * 4), (em1_fst * 2), em1_nalu)") | |
785 | ||
786 | ;; Floating-Point Operations. | |
787 | ||
788 | (define_insn_reservation "exynos_m1_fp_const" 2 | |
789 | (and (eq_attr "tune" "exynosm1") | |
790 | (eq_attr "type" "fconsts, fconstd")) | |
791 | "em1_nalu") | |
792 | ||
793 | (define_insn_reservation "exynos_m1_fp_add" 4 | |
794 | (and (eq_attr "tune" "exynosm1") | |
795 | (eq_attr "type" "fadds, faddd")) | |
796 | "em1_fadd") | |
797 | ||
798 | (define_insn_reservation "exynos_m1_fp_mul" 5 | |
799 | (and (eq_attr "tune" "exynosm1") | |
800 | (eq_attr "type" "fmuls, fmuld")) | |
801 | "em1_fmac") | |
802 | ||
803 | (define_insn_reservation "exynos_m1_fp_mac" 6 | |
804 | (and (eq_attr "tune" "exynosm1") | |
805 | (eq_attr "type" "fmacs, ffmas, fmacd, ffmad")) | |
806 | "em1_fmac") | |
807 | ||
808 | (define_insn_reservation "exynos_m1_fp_cvt" 4 | |
809 | (and (eq_attr "tune" "exynosm1") | |
810 | (eq_attr "type" "f_cvt, f_rints, f_rintd")) | |
811 | "em1_fcvt") | |
812 | ||
813 | (define_insn_reservation "exynos_m1_fp_cvt_i" 13 | |
814 | (and (eq_attr "tune" "exynosm1") | |
815 | (eq_attr "type" "f_cvtf2i")) | |
816 | "(em1_fcvt, em1_lfst)") | |
817 | ||
818 | (define_insn_reservation "exynos_m1_i_cvt_fp" 9 | |
819 | (and (eq_attr "tune" "exynosm1") | |
820 | (eq_attr "type" "f_cvti2f")) | |
821 | "(em1_st, em1_fcvt)") | |
822 | ||
823 | (define_insn_reservation "exynos_m1_fp_cmp" 4 | |
824 | (and (eq_attr "tune" "exynosm1") | |
825 | (eq_attr "type" "fcmps, fcmpd")) | |
826 | "em1_nmisc") | |
827 | ||
c297d256 EM |
828 | (define_insn_reservation "exynos_m1_fp_ccmp" 7 |
829 | (and (eq_attr "tune" "exynosm1") | |
830 | (eq_attr "type" "fccmps, fccmpd")) | |
831 | "(em1_st, em1_nmisc)") | |
832 | ||
b2ca46df EM |
833 | (define_insn_reservation "exynos_m1_fp_sel" 4 |
834 | (and (eq_attr "tune" "exynosm1") | |
835 | (eq_attr "type" "fcsel")) | |
836 | "(em1_st + em1_nalu0)") | |
837 | ||
838 | (define_insn_reservation "exynos_m1_fp_arith" 2 | |
839 | (and (eq_attr "tune" "exynosm1") | |
840 | (eq_attr "type" "ffariths, ffarithd")) | |
841 | "em1_nalu") | |
842 | ||
843 | (define_insn_reservation "exynos_m1_fp_cpy" 2 | |
844 | (and (eq_attr "tune" "exynosm1") | |
845 | (eq_attr "type" "fmov")) | |
846 | "em1_nalu") | |
847 | ||
848 | (define_insn_reservation "exynos_m1_fp_divs" 15 | |
849 | (and (eq_attr "tune" "exynosm1") | |
850 | (eq_attr "type" "fdivs, neon_fp_div_s, neon_fp_div_s_q,\ | |
851 | fsqrts, neon_fp_sqrt_s, neon_fp_sqrt_s_q")) | |
852 | "(em1_fvar * 9)") | |
853 | ||
854 | (define_insn_reservation "exynos_m1_fp_divd" 22 | |
855 | (and (eq_attr "tune" "exynosm1") | |
856 | (eq_attr "type" "fdivd, neon_fp_div_d, neon_fp_div_d_q,\ | |
857 | fsqrtd, neon_fp_sqrt_d, neon_fp_sqrt_d_q")) | |
858 | "(em1_fvar * 9)") | |
859 | ||
860 | (define_insn_reservation "exynos_m1_fp_minmax" 2 | |
861 | (and (eq_attr "tune" "exynosm1") | |
862 | (eq_attr "type" "f_minmaxs, f_minmaxd")) | |
863 | "(em1_nmisc * 2)") | |
864 | ||
865 | ;; Crypto Operations. | |
866 | ||
867 | (define_insn_reservation "exynos_m1_crypto_simple" 2 | |
868 | (and (eq_attr "tune" "exynosm1") | |
869 | (eq_attr "type" "crypto_aese, crypto_aesmc,\ | |
870 | crypto_sha1_xor, crypto_sha1_fast, crypto_sha256_fast")) | |
871 | "em1_ncrypt") | |
872 | ||
873 | (define_insn_reservation "exynos_m1_crypto_complex" 6 | |
874 | (and (eq_attr "tune" "exynosm1") | |
875 | (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow")) | |
876 | "em1_ncrypt") | |
877 | ||
878 | (define_insn_reservation "exynos_m1_crypto_poly" 2 | |
879 | (and (eq_attr "tune" "exynosm1") | |
880 | (eq_attr "type" "neon_mul_b_long, neon_mul_h_long, neon_mul_s_long")) | |
881 | "em1_ncrypt") | |
882 | ||
883 | (define_insn_reservation "exynos_m1_crypto_polyl" 4 | |
884 | (and (eq_attr "tune" "exynosm1") | |
885 | (eq_attr "type" "neon_mul_d_long")) | |
886 | "em1_ncrypt") | |
887 | ||
888 | (define_insn_reservation "exynos_m1_crc" 2 | |
889 | (and (eq_attr "tune" "exynosm1") | |
890 | (eq_attr "type" "crc")) | |
891 | "em1_c") | |
892 | ||
893 | ;; Simple execution unit bypasses | |
894 | ||
895 | ;; Pre-decrement and post-increment addressing modes update the register quickly. | |
896 | ;; TODO: figure out how to tell the addressing mode register from the loaded one. | |
25cc2199 EM |
897 | (define_bypass 1 "exynos_m1_store*, exynos_m1_neon_store*" |
898 | "exynos_m1_store*, exynos_m1_neon_store*, | |
899 | exynos_m1_load*, exynos_m1_neon_load*") | |
b2ca46df EM |
900 | |
901 | ;; MLAs can feed other MLAs quickly. | |
902 | (define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*") | |
903 | ||
904 | ;; Insns in FMAC or FADD can feed other such insns quickly. | |
905 | (define_bypass 4 "exynos_m1_fp_mul" | |
906 | "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") | |
907 | (define_bypass 5 "exynos_m1_fp_mac" | |
908 | "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") | |
909 | (define_bypass 4 "exynos_m1_neon_fp_mul" | |
910 | "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ | |
911 | exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") | |
912 | (define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step" | |
913 | "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ | |
914 | exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") | |
b2ca46df EM |
915 | (define_bypass 3 "exynos_m1_fp_add" |
916 | "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") | |
917 | (define_bypass 3 "exynos_m1_neon_fp_add" | |
918 | "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ | |
919 | exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") | |
920 | ||
921 | ;; Insns in NALU can feed other such insns quickly. | |
922 | (define_bypass 1 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy" | |
923 | "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\ | |
924 | exynos_m1_fp_sel") | |
925 | (define_bypass 3 "exynos_m1_fp_sel" | |
926 | "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\ | |
927 | exynos_m1_fp_sel") | |
928 | (define_bypass 1 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\ | |
929 | exynos_m1_neon_bitops, exynos_m1_neon_bitins,\ | |
930 | exynos_m1_neon_tbl" | |
931 | "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\ | |
932 | exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\ | |
933 | exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\ | |
934 | exynos_m1_neon_tbl") | |
935 | (define_bypass 3 "exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex" | |
936 | "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\ | |
937 | exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\ | |
938 | exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\ | |
939 | exynos_m1_neon_tbl") | |
940 | (define_bypass 1 "exynos_m1_neon_fp_unary" "exynos_m1_neon_fp_unary") | |
941 | ||
942 | ;; Insns in NCRYPT can feed other such insns quickly. | |
943 | (define_bypass 1 "exynos_m1_crypto_simple, exynos_m1_crypto_poly" | |
944 | "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ | |
945 | exynos_m1_crypto_poly*") | |
946 | (define_bypass 3 "exynos_m1_crypto_polyl" | |
947 | "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ | |
948 | exynos_m1_crypto_poly*") | |
949 | (define_bypass 5 "exynos_m1_crypto_complex" | |
950 | "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ | |
951 | exynos_m1_crypto_poly*") | |
952 | ||
25cc2199 EM |
953 | ;; AES{D,E}/AESMC pairs can feed each other instantly. |
954 | (define_bypass 0 "exynos_m1_crypto_simple" | |
955 | "exynos_m1_crypto_simple" | |
956 | "aarch_crypto_can_dual_issue") | |
957 | ||
b2ca46df EM |
958 | ;; Predicted branches take no time, but mispredicted ones take forever anyway. |
959 | (define_bypass 1 "exynos_m1_*" | |
960 | "exynos_m1_call, exynos_m1_branch") |