]>
Commit | Line | Data |
---|---|---|
b2ca46df | 1 | ;; Samsung Exynos M1 pipeline description |
818ab71a | 2 | ;; Copyright (C) 2014-2016 Free Software Foundation, Inc. |
b2ca46df EM |
3 | ;; |
4 | ;; This file is part of GCC. | |
5 | ;; | |
6 | ;; GCC is free software; you can redistribute it and/or modify it | |
7 | ;; under the terms of the GNU General Public License as published by | |
8 | ;; the Free Software Foundation; either version 3, or (at your option) | |
9 | ;; any later version. | |
10 | ;; | |
11 | ;; GCC is distributed in the hope that it will be useful, but | |
12 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | ;; General Public License for more details. | |
15 | ;; | |
16 | ;; You should have received a copy of the GNU General Public License | |
17 | ;; along with GCC; see the file COPYING3. If not see | |
18 | ;; <http://www.gnu.org/licenses/>. | |
19 | ||
20 | (define_attr "exynos_m1_neon_type" | |
21 | "neon_arith_simple, neon_arith_basic, neon_arith_complex, | |
22 | neon_multiply, neon_mla, neon_mla_q, neon_mla_long, neon_sat_mla_long, | |
23 | neon_shift_acc, neon_shift_imm_basic, neon_shift_imm_complex, | |
24 | neon_shift_reg_basic, neon_shift_reg_basic_q, | |
25 | neon_shift_reg_complex, neon_shift_reg_complex_q, | |
26 | neon_fp_unary, neon_fp_add, neon_fp_abd, neon_fp_compare, | |
27 | neon_fp_reduc_minmax, neon_fp_reduc_add, neon_fp_round, neon_fp_cvt, | |
28 | neon_fp_minmax, neon_fp_mul, neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, | |
29 | neon_fp_estimate, neon_fp_estimatex, neon_fp_step, | |
30 | neon_bitops, neon_bitops_q, neon_bitins, | |
31 | neon_to_gp, neon_from_gp, neon_move, neon_tbl, | |
32 | neon_load1_1, neon_load1_2, neon_load1_3, neon_load1_4, | |
33 | neon_load1_one, neon_load1_all, | |
34 | neon_load2_2, neon_load2_one, neon_load2_all, | |
35 | neon_load3_3, neon_load3_one, neon_load3_all, | |
36 | neon_load4_4, neon_load4_one, neon_load4_all, | |
37 | neon_store, | |
38 | neon_store1_1, neon_store1_2, neon_store1_3, neon_store1_4, neon_store1_one, | |
39 | neon_store2_2, neon_store2_one, | |
40 | neon_store3_3, neon_store3_one, | |
41 | neon_store4_4, neon_store4_one, | |
42 | unknown" | |
43 | (cond [ | |
44 | (eq_attr "type" "neon_abd, neon_abd_q, neon_abd_long,\ | |
45 | neon_abs, neon_abs_q,\ | |
46 | neon_minmax, neon_minmax_q") | |
47 | (const_string "neon_arith_simple") | |
48 | ||
49 | (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\ | |
50 | neon_neg, neon_neg_q,\ | |
51 | neon_sub, neon_sub_q, neon_sub_long, neon_sub_widen,\ | |
52 | neon_logic, neon_logic_q, neon_tst, neon_tst_q,\ | |
53 | neon_compare_zero, neon_compare_zero_q") | |
54 | (const_string "neon_arith_basic") | |
55 | ||
56 | (eq_attr "type" "neon_add_widen, neon_arith_acc, neon_arith_acc_q,\ | |
57 | neon_reduc_add, neon_reduc_add_q,\ | |
58 | neon_reduc_add_acc, neon_reduc_add_acc_q,\ | |
59 | neon_reduc_add_long, neon_add_halve_narrow_q,\ | |
60 | neon_add_halve, neon_add_halve_q,\ | |
61 | neon_sub_halve, neon_sub_halve_q, neon_qabs,\ | |
62 | neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\ | |
63 | neon_qneg_q, neon_qsub, neon_qsub_q,\ | |
64 | neon_sub_halve_narrow_q,\ | |
65 | neon_compare, neon_compare_q,\ | |
66 | neon_reduc_minmax, neon_reduc_minmax_q") | |
67 | (const_string "neon_arith_complex") | |
68 | ||
69 | (eq_attr "type" "neon_mul_b, neon_mul_b_q, neon_mul_h, neon_mul_h_q,\ | |
70 | neon_mul_s, neon_mul_s_q,\ | |
71 | neon_mul_h_scalar, neon_mul_h_scalar_q,\ | |
72 | neon_mul_s_scalar, neon_mul_s_scalar_q,\ | |
73 | neon_mul_h_scalar_long, neon_mul_s_scalar_long,\ | |
74 | neon_sat_mul_b, neon_sat_mul_b_q,\ | |
75 | neon_sat_mul_h, neon_sat_mul_h_q,\ | |
76 | neon_sat_mul_s, neon_sat_mul_s_q,\ | |
77 | neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q,\ | |
78 | neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q,\ | |
79 | neon_sat_mul_b_long, neon_sat_mul_h_long,\ | |
80 | neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\ | |
81 | neon_sat_mul_s_scalar_long") | |
82 | (const_string "neon_multiply") | |
83 | ||
84 | (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\ | |
85 | neon_mla_h_scalar, neon_mla_s_scalar,\ | |
86 | neon_mla_b_long, neon_mla_h_long,\ | |
87 | neon_mla_s_long,\ | |
88 | neon_mla_h_scalar_long, neon_mla_s_scalar_long,\ | |
89 | neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\ | |
90 | neon_mla_h_scalar_q, neon_mla_s_scalar_q") | |
91 | (const_string "neon_mla") | |
92 | ||
93 | (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\ | |
94 | neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\ | |
95 | neon_sat_mla_s_scalar_long") | |
96 | (const_string "neon_sat_mla_long") | |
97 | ||
98 | (eq_attr "type" "neon_shift_acc, neon_shift_acc_q") | |
99 | (const_string "neon_shift_acc") | |
100 | ||
101 | (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\ | |
102 | neon_shift_imm_narrow_q, neon_shift_imm_long") | |
103 | (const_string "neon_shift_imm_basic") | |
104 | ||
105 | (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\ | |
106 | neon_sat_shift_imm_narrow_q") | |
107 | (const_string "neon_shift_imm_complex") | |
108 | ||
109 | (eq_attr "type" "neon_shift_reg, neon_shift_reg_q") | |
110 | (const_string "neon_shift_reg_basic") | |
111 | ||
112 | (eq_attr "type" "neon_sat_shift_reg, neon_sat_shift_reg_q") | |
113 | (const_string "neon_shift_reg_complex") | |
114 | ||
115 | (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\ | |
116 | neon_fp_abs_s, neon_fp_abs_s_q,\ | |
117 | neon_fp_neg_d, neon_fp_neg_d_q,\ | |
118 | neon_fp_abs_d, neon_fp_abs_d_q") | |
119 | (const_string "neon_fp_unary") | |
120 | ||
121 | (eq_attr "type" "neon_fp_addsub_s, neon_fp_addsub_s_q,\ | |
122 | neon_fp_addsub_d, neon_fp_addsub_d_q") | |
123 | (const_string "neon_fp_add") | |
124 | ||
125 | (eq_attr "type" "neon_fp_abd_s, neon_fp_abd_s_q,\ | |
126 | neon_fp_abd_d, neon_fp_abd_d_q") | |
127 | (const_string "neon_fp_abd") | |
128 | ||
129 | (eq_attr "type" "neon_fp_compare_s, neon_fp_compare_s_q,\ | |
130 | neon_fp_compare_d, neon_fp_compare_d_q,\ | |
131 | neon_fp_minmax_s, neon_fp_minmax_s_q,\ | |
132 | neon_fp_minmax_d, neon_fp_minmax_d_q") | |
133 | (const_string "neon_fp_compare") | |
134 | ||
135 | (eq_attr "type" "neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_s_q,\ | |
136 | neon_fp_reduc_minmax_d, neon_fp_reduc_minmax_d_q") | |
137 | (const_string "neon_fp_reduc_minmax") | |
138 | ||
139 | (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_add_s_q,\ | |
140 | neon_fp_reduc_add_d, neon_fp_reduc_add_d_q") | |
141 | (const_string "neon_fp_reduc_add") | |
142 | ||
143 | (eq_attr "type" "neon_fp_round_s, neon_fp_round_s_q,\ | |
144 | neon_fp_round_d, neon_fp_round_d_q") | |
145 | (const_string "neon_fp_round") | |
146 | ||
147 | (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h, | |
148 | neon_fp_to_int_s, neon_fp_to_int_s_q,\ | |
149 | neon_fp_to_int_d_q, neon_fp_to_int_d,\ | |
150 | neon_int_to_fp_s, neon_int_to_fp_s_q,\ | |
151 | neon_int_to_fp_d, neon_int_to_fp_d_q") | |
152 | (const_string "neon_fp_cvt") | |
153 | ||
154 | (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_q,\ | |
155 | neon_fp_mul_s_scalar, neon_fp_mul_s_scalar_q,\ | |
156 | neon_fp_mul_d, neon_fp_mul_d_q,\ | |
157 | neon_fp_mul_d_scalar_q") | |
158 | (const_string "neon_fp_mul") | |
159 | ||
160 | (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_q,\ | |
161 | neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\ | |
162 | neon_fp_mla_d, neon_fp_mla_d_q,\ | |
163 | neon_fp_mla_d_scalar_q") | |
164 | (const_string "neon_fp_mla") | |
165 | ||
166 | (eq_attr "type" "neon_fp_recpe_s, neon_fp_recpe_s_q,\ | |
167 | neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\ | |
168 | neon_fp_recpe_d, neon_fp_recpe_d_q,\ | |
169 | neon_fp_rsqrte_d, neon_fp_rsqrte_d_q") | |
170 | (const_string "neon_fp_estimate") | |
171 | ||
172 | (eq_attr "type" "neon_fp_recpx_s, neon_fp_recpx_s_q,\ | |
173 | neon_fp_recpx_d, neon_fp_recpx_d_q") | |
174 | (const_string "neon_fp_estimatex") | |
175 | ||
176 | (eq_attr "type" "neon_fp_recps_s, neon_fp_recps_s_q,\ | |
177 | neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\ | |
178 | neon_fp_recps_d, neon_fp_recps_d_q,\ | |
179 | neon_fp_rsqrts_d, neon_fp_rsqrts_d_q") | |
180 | (const_string "neon_fp_step") | |
181 | ||
182 | (eq_attr "type" "neon_rbit, neon_rbit_q,\ | |
183 | neon_cls, neon_cls_q, neon_cnt, neon_cnt_q,\ | |
184 | neon_dup, neon_dup_q,\ | |
185 | neon_rev, neon_rev_q,\ | |
186 | neon_move, neon_move_q, | |
187 | neon_ext, neon_permute, neon_zip") | |
188 | (const_string "neon_bitops") | |
189 | ||
190 | (eq_attr "type" "neon_ext_q, neon_permute_q, neon_zip_q") | |
191 | (const_string "neon_bitops_q") | |
192 | ||
193 | (eq_attr "type" "neon_bsl, neon_bsl_q") | |
194 | (const_string "neon_bitins") | |
195 | ||
196 | (eq_attr "type" "neon_tbl1, neon_tbl2, neon_tbl3, neon_tbl4") | |
197 | (const_string "neon_tbl") | |
198 | ||
199 | (eq_attr "type" "neon_from_gp, neon_from_gp_q, f_mcr, f_mcrr") | |
200 | (const_string "neon_from_gp") | |
201 | ||
202 | (eq_attr "type" "neon_to_gp, neon_to_gp_q, f_mrc, f_mrrc") | |
203 | (const_string "neon_to_gp") | |
204 | ||
205 | (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q") | |
206 | (const_string "neon_load1_1") | |
207 | ||
208 | (eq_attr "type" "neon_load1_2reg, neon_load1_2reg_q") | |
209 | (const_string "neon_load1_2") | |
210 | ||
211 | (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q") | |
212 | (const_string "neon_load1_3") | |
213 | ||
214 | (eq_attr "type" "neon_load1_4reg, neon_load1_4reg_q") | |
215 | (const_string "neon_load1_4") | |
216 | ||
217 | (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q") | |
218 | (const_string "neon_load1_one") | |
219 | ||
220 | (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q") | |
221 | (const_string "neon_load1_all") | |
222 | ||
223 | (eq_attr "type" "neon_load2_2reg, neon_load2_2reg_q,\ | |
224 | neon_load2_4reg, neon_load2_4reg_q") | |
225 | (const_string "neon_load2_2") | |
226 | ||
227 | (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q") | |
228 | (const_string "neon_load2_one") | |
229 | ||
230 | (eq_attr "type" "neon_load2_all_lanes, neon_load2_all_lanes_q") | |
231 | (const_string "neon_load2_all") | |
232 | ||
233 | (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q") | |
234 | (const_string "neon_load3_3") | |
235 | ||
236 | (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q") | |
237 | (const_string "neon_load3_one") | |
238 | ||
239 | (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q") | |
240 | (const_string "neon_load3_all") | |
241 | ||
242 | (eq_attr "type" "neon_load4_4reg, neon_load4_4reg_q") | |
243 | (const_string "neon_load4_4") | |
244 | ||
245 | (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q") | |
246 | (const_string "neon_load4_one") | |
247 | ||
248 | (eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q") | |
249 | (const_string "neon_load4_all") | |
250 | ||
251 | (eq_attr "type" "f_stores, f_stored,\ | |
252 | neon_stp, neon_stp_q") | |
253 | (const_string "neon_store") | |
254 | ||
255 | (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q") | |
256 | (const_string "neon_store1_1") | |
257 | ||
258 | (eq_attr "type" "neon_store1_2reg, neon_store1_2reg_q") | |
259 | (const_string "neon_store1_2") | |
260 | ||
261 | (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q") | |
262 | (const_string "neon_store1_3") | |
263 | ||
264 | (eq_attr "type" "neon_store1_4reg, neon_store1_4reg_q") | |
265 | (const_string "neon_store1_4") | |
266 | ||
267 | (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q") | |
268 | (const_string "neon_store1_one") | |
269 | ||
270 | (eq_attr "type" "neon_store2_2reg, neon_store2_2reg_q,\ | |
271 | neon_store2_4reg, neon_store2_4reg_q") | |
272 | (const_string "neon_store2_2") | |
273 | ||
274 | (eq_attr "type" "neon_store2_one_lane, neon_store2_one_lane_q") | |
275 | (const_string "neon_store2_one") | |
276 | ||
277 | (eq_attr "type" "neon_store3_3reg, neon_store3_3reg_q") | |
278 | (const_string "neon_store3_3") | |
279 | ||
280 | (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q") | |
281 | (const_string "neon_store3_one") | |
282 | ||
283 | (eq_attr "type" "neon_store4_4reg, neon_store4_4reg_q") | |
284 | (const_string "neon_store4_4") | |
285 | ||
286 | (eq_attr "type" "neon_store4_one_lane, neon_store4_one_lane_q") | |
287 | (const_string "neon_store4_one")] | |
288 | ||
289 | (const_string "unknown"))) | |
290 | ||
291 | ;; The Exynos M1 core is modeled as a triple issue pipeline that has | |
292 | ;; the following functional units. | |
293 | ||
294 | (define_automaton "exynos_m1_gp") | |
295 | (define_automaton "exynos_m1_ls") | |
296 | (define_automaton "exynos_m1_fp") | |
297 | ||
298 | ;; 1. Two pipelines for simple integer operations: A, B | |
299 | ;; 2. One pipeline for simple or complex integer operations: C | |
300 | ||
301 | (define_cpu_unit "em1_xa, em1_xb, em1_xc" "exynos_m1_gp") | |
302 | ||
303 | (define_reservation "em1_alu" "(em1_xa | em1_xb | em1_xc)") | |
304 | (define_reservation "em1_c" "em1_xc") | |
305 | ||
306 | ;; 3. Two asymmetric pipelines for Neon and FP operations: F0, F1 | |
307 | ||
308 | (define_cpu_unit "em1_f0, em1_f1" "exynos_m1_fp") | |
309 | ||
310 | (define_reservation "em1_fmac" "em1_f0") | |
311 | (define_reservation "em1_fcvt" "em1_f0") | |
312 | (define_reservation "em1_nalu" "(em1_f0 | em1_f1)") | |
313 | (define_reservation "em1_nalu0" "em1_f0") | |
314 | (define_reservation "em1_nalu1" "em1_f1") | |
315 | (define_reservation "em1_nmisc" "em1_f0") | |
316 | (define_reservation "em1_ncrypt" "em1_f0") | |
317 | (define_reservation "em1_fadd" "em1_f1") | |
318 | (define_reservation "em1_fvar" "em1_f1") | |
319 | (define_reservation "em1_fst" "em1_f1") | |
320 | ||
321 | ;; 4. One pipeline for branch operations: BX | |
322 | ||
323 | (define_cpu_unit "em1_bx" "exynos_m1_gp") | |
324 | ||
325 | (define_reservation "em1_br" "em1_bx") | |
326 | ||
327 | ;; 5. One AGU for loads: L | |
328 | ;; One AGU for stores and one pipeline for stores: S, SD | |
329 | ||
330 | (define_cpu_unit "em1_lx" "exynos_m1_ls") | |
331 | (define_cpu_unit "em1_sx, em1_sd" "exynos_m1_ls") | |
332 | ||
333 | (define_reservation "em1_ld" "em1_lx") | |
334 | (define_reservation "em1_st" "(em1_sx + em1_sd)") | |
335 | ||
336 | ;; Common occurrences | |
337 | (define_reservation "em1_sfst" "(em1_fst + em1_st)") | |
338 | (define_reservation "em1_lfst" "(em1_fst + em1_ld)") | |
339 | ||
340 | ;; Branches | |
341 | ;; | |
342 | ;; No latency as there is no result | |
343 | ;; TODO: Unconditional branches use no units; | |
344 | ;; conditional branches add the BX unit; | |
345 | ;; indirect branches add the C unit. | |
346 | (define_insn_reservation "exynos_m1_branch" 0 | |
347 | (and (eq_attr "tune" "exynosm1") | |
348 | (eq_attr "type" "branch")) | |
349 | "em1_br") | |
350 | ||
351 | (define_insn_reservation "exynos_m1_call" 1 | |
352 | (and (eq_attr "tune" "exynosm1") | |
353 | (eq_attr "type" "call")) | |
354 | "em1_alu") | |
355 | ||
356 | ;; Basic ALU | |
357 | ;; | |
358 | ;; Simple ALU without shift, non-predicated | |
359 | (define_insn_reservation "exynos_m1_alu" 1 | |
360 | (and (eq_attr "tune" "exynosm1") | |
361 | (and (not (eq_attr "predicated" "yes")) | |
362 | (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\ | |
363 | alu_sreg, alus_sreg, logic_reg, logics_reg,\ | |
364 | adc_imm, adcs_imm, adc_reg, adcs_reg,\ | |
365 | adr, bfm, clz, rbit, rev, csel, alu_dsp_reg,\ | |
366 | shift_imm, shift_reg, rotate_imm, extend,\ | |
367 | mov_imm, mov_reg,\ | |
368 | mvn_imm, mvn_reg,\ | |
369 | mrs, multiple"))) | |
370 | "em1_alu") | |
371 | ||
372 | ;; Simple ALU without shift, predicated | |
373 | (define_insn_reservation "exynos_m1_alu_p" 1 | |
374 | (and (eq_attr "tune" "exynosm1") | |
375 | (and (eq_attr "predicated" "yes") | |
376 | (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\ | |
377 | alu_sreg, alus_sreg, logic_reg, logics_reg,\ | |
378 | adc_imm, adcs_imm, adc_reg, adcs_reg,\ | |
379 | adr, bfm, clz, rbit, rev, alu_dsp_reg,\ | |
380 | shift_imm, shift_reg, rotate_imm, extend,\ | |
381 | mov_imm, mov_reg,\ | |
382 | mvn_imm, mvn_reg,\ | |
383 | mrs, multiple"))) | |
384 | "em1_c") | |
385 | ||
386 | ;; ALU ops with immediate shift | |
387 | ;; TODO: if the shift value is between 0 and 3, the latency is just 1 cycle; | |
388 | ;; otherwise it takes 2 cycles and the unit is blocked; | |
389 | ;; for now, assume the latter's latency and the former's units. | |
390 | (define_insn_reservation "exynos_m1_alu_shift" 2 | |
391 | (and (eq_attr "tune" "exynosm1") | |
392 | (eq_attr "type" "alu_ext, alus_ext,\ | |
393 | alu_shift_imm, alus_shift_imm,\ | |
394 | logic_shift_imm, logics_shift_imm,\ | |
395 | mov_shift, mvn_shift")) | |
396 | "(em1_alu)") | |
397 | ||
398 | ;; ALU ops with register controlled shift, non-predicated | |
399 | (define_insn_reservation "exynos_m1_alu_shift_reg" 2 | |
400 | (and (eq_attr "tune" "exynosm1") | |
401 | (and (not (eq_attr "predicated" "yes")) | |
402 | (eq_attr "type" "alu_shift_reg, alus_shift_reg,\ | |
403 | logic_shift_reg, logics_shift_reg,\ | |
404 | mov_shift_reg, mvn_shift_reg"))) | |
405 | "(em1_alu * 2)") | |
406 | ||
407 | ;; ALU ops with register controlled shift, predicated | |
408 | (define_insn_reservation "exynos_m1_alu_shift_reg_p" 2 | |
409 | (and (eq_attr "tune" "exynosm1") | |
410 | (and (eq_attr "predicated" "yes") | |
411 | (eq_attr "type" "alu_shift_reg, alus_shift_reg,\ | |
412 | logic_shift_reg, logics_shift_reg,\ | |
413 | mov_shift_reg, mvn_shift_reg"))) | |
414 | "(em1_alu, em1_c)") | |
415 | ||
416 | ;; Integer multiply | |
417 | (define_insn_reservation "exynos_m1_mla" 3 | |
418 | (and (eq_attr "tune" "exynosm1") | |
419 | (eq_attr "mul32" "yes")) | |
420 | "em1_c") | |
421 | ||
422 | (define_insn_reservation "exynos_m1_mlal" 4 | |
423 | (and (eq_attr "tune" "exynosm1") | |
424 | (eq_attr "mul64" "yes")) | |
425 | "em1_alu, em1_c") | |
426 | ||
427 | ;; Integer divide | |
428 | ;; TODO: assume the median latency; blocks other divisions | |
429 | (define_insn_reservation "exynos_m1_div" 13 | |
430 | (and (eq_attr "tune" "exynosm1") | |
431 | (eq_attr "type" "udiv, sdiv")) | |
432 | "em1_c") | |
433 | ||
434 | ;; Load-store execution Unit | |
435 | ;; | |
436 | ;; Loads of up to 2 words. | |
437 | (define_insn_reservation "exynos_m1_load" 4 | |
438 | (and (eq_attr "tune" "exynosm1") | |
439 | (eq_attr "type" "load_byte, load1, load2")) | |
440 | "em1_ld") | |
441 | ||
442 | ;; Loads of 3 or 4 words. | |
443 | (define_insn_reservation "exynos_m1_loadm" 6 | |
444 | (and (eq_attr "tune" "exynosm1") | |
445 | (eq_attr "type" "load3, load4")) | |
446 | "(em1_ld * 3)") | |
447 | ||
448 | ;; Stores of up to 2 words. | |
449 | (define_insn_reservation "exynos_m1_store" 1 | |
450 | (and (eq_attr "tune" "exynosm1") | |
451 | (eq_attr "type" "store1, store2")) | |
452 | "em1_st") | |
453 | ||
454 | ;; Stores of 3 or 4 words. | |
455 | (define_insn_reservation "exynos_m1_storem" 3 | |
456 | (and (eq_attr "tune" "exynosm1") | |
457 | (eq_attr "type" "store3, store4")) | |
458 | "(em1_st * 3)") | |
459 | ||
460 | ;; Advanced SIMD Unit | |
461 | ;; | |
462 | ;; Integer Arithmetic Instructions. | |
463 | ||
464 | (define_insn_reservation "exynos_m1_arith_simple" 1 | |
465 | (and (eq_attr "tune" "exynosm1") | |
466 | (eq_attr "exynos_m1_neon_type" "neon_arith_simple")) | |
467 | "em1_nmisc") | |
468 | ||
469 | (define_insn_reservation "exynos_m1_neon_arith_basic" 2 | |
470 | (and (eq_attr "tune" "exynosm1") | |
471 | (eq_attr "exynos_m1_neon_type" "neon_arith_basic")) | |
472 | "em1_nalu") | |
473 | ||
474 | (define_insn_reservation "exynos_m1_neon_arith_complex" 3 | |
475 | (and (eq_attr "tune" "exynosm1") | |
476 | (eq_attr "exynos_m1_neon_type" "neon_arith_complex")) | |
477 | "em1_nmisc") | |
478 | ||
479 | ;; Integer Multiply Instructions. | |
480 | ||
481 | (define_insn_reservation "exynos_m1_neon_multiply" 4 | |
482 | (and (eq_attr "tune" "exynosm1") | |
483 | (eq_attr "exynos_m1_neon_type" | |
484 | "neon_multiply, neon_mla, neon_sat_mla_long")) | |
485 | "em1_nmisc") | |
486 | ||
487 | ;; Integer Shift Instructions. | |
488 | ||
489 | (define_insn_reservation | |
490 | "exynos_m1_neon_shift_acc" 4 | |
491 | (and (eq_attr "tune" "exynosm1") | |
492 | (eq_attr "exynos_m1_neon_type" "neon_shift_acc")) | |
493 | "em1_nalu1") | |
494 | ||
495 | (define_insn_reservation | |
496 | "exynos_m1_neon_shift_basic" 2 | |
497 | (and (eq_attr "tune" "exynosm1") | |
498 | (eq_attr "exynos_m1_neon_type" | |
499 | "neon_shift_imm_basic, neon_shift_reg_basic")) | |
500 | "em1_nalu") | |
501 | ||
502 | (define_insn_reservation | |
503 | "exynos_m1_neon_shift_complex" 4 | |
504 | (and (eq_attr "tune" "exynosm1") | |
505 | (eq_attr "exynos_m1_neon_type" | |
506 | "neon_shift_imm_complex, neon_shift_reg_complex")) | |
507 | "em1_nalu1") | |
508 | ||
509 | ;; Floating Point Instructions. | |
510 | ||
511 | (define_insn_reservation | |
512 | "exynos_m1_neon_fp_unary" 2 | |
513 | (and (eq_attr "tune" "exynosm1") | |
514 | (eq_attr "exynos_m1_neon_type" "neon_fp_unary")) | |
515 | "em1_nalu") | |
516 | ||
517 | (define_insn_reservation | |
518 | "exynos_m1_neon_fp_add" 4 | |
519 | (and (eq_attr "tune" "exynosm1") | |
520 | (eq_attr "exynos_m1_neon_type" "neon_fp_add")) | |
521 | "em1_fadd") | |
522 | ||
523 | (define_insn_reservation | |
524 | "exynos_m1_neon_fp_abd" 3 | |
525 | (and (eq_attr "tune" "exynosm1") | |
526 | (eq_attr "exynos_m1_neon_type" "neon_fp_abd")) | |
527 | "em1_nmisc") | |
528 | ||
529 | (define_insn_reservation | |
530 | "exynos_m1_neon_fp_compare" 1 | |
531 | (and (eq_attr "tune" "exynosm1") | |
532 | (eq_attr "exynos_m1_neon_type" "neon_fp_compare")) | |
533 | "em1_nmisc") | |
534 | ||
535 | ;; TODO: the latency and throughput of reduce insns actually varies between | |
536 | ;; 3-5 and 1/4-1, but picked the median values. | |
537 | (define_insn_reservation | |
538 | "exynos_m1_neon_fp_reduc" 5 | |
539 | (and (eq_attr "tune" "exynosm1") | |
540 | (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_minmax")) | |
541 | "(em1_nmisc * 4)") | |
542 | ||
543 | (define_insn_reservation | |
544 | "exynos_m1_neon_fp_reduc_add" 10 | |
545 | (and (eq_attr "tune" "exynosm1") | |
546 | (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_add")) | |
547 | "((em1_nalu * 2), em1_fadd)") | |
548 | ||
549 | (define_insn_reservation | |
550 | "exynos_m1_neon_fp_round" 4 | |
551 | (and (eq_attr "tune" "exynosm1") | |
552 | (eq_attr "exynos_m1_neon_type" "neon_fp_round")) | |
553 | "em1_fcvt") | |
554 | ||
555 | (define_insn_reservation | |
556 | "exynos_m1_neon_fp_cvt" 4 | |
557 | (and (eq_attr "tune" "exynosm1") | |
558 | (eq_attr "exynos_m1_neon_type" "neon_fp_cvt")) | |
559 | "em1_fcvt") | |
560 | ||
561 | (define_insn_reservation | |
562 | "exynos_m1_neon_fp_mul" 5 | |
563 | (and (eq_attr "tune" "exynosm1") | |
564 | (eq_attr "exynos_m1_neon_type" "neon_fp_mul")) | |
565 | "em1_fmac") | |
566 | ||
567 | (define_insn_reservation | |
568 | "exynos_m1_neon_fp_mla" 6 | |
569 | (and (eq_attr "tune" "exynosm1") | |
570 | (eq_attr "exynos_m1_neon_type" "neon_fp_mla")) | |
571 | "em1_fmac") | |
572 | ||
573 | (define_insn_reservation | |
574 | "exynos_m1_neon_fp_estimate" 5 | |
575 | (and (eq_attr "tune" "exynosm1") | |
576 | (eq_attr "exynos_m1_neon_type" "neon_fp_estimate")) | |
577 | "em1_fcvt") | |
578 | ||
579 | (define_insn_reservation | |
580 | "exynos_m1_neon_fp_estimatex" 1 | |
581 | (and (eq_attr "tune" "exynosm1") | |
582 | (eq_attr "exynos_m1_neon_type" "neon_fp_estimatex")) | |
583 | "em1_nmisc") | |
584 | ||
585 | (define_insn_reservation | |
586 | "exynos_m1_neon_fp_step" 6 | |
587 | (and (eq_attr "tune" "exynosm1") | |
588 | (eq_attr "exynos_m1_neon_type" "neon_fp_step")) | |
589 | "em1_fmac") | |
590 | ||
591 | ;; Miscellaneous Instructions. | |
592 | ||
593 | (define_insn_reservation | |
594 | "exynos_m1_neon_bitops" 2 | |
595 | (and (eq_attr "tune" "exynosm1") | |
596 | (eq_attr "exynos_m1_neon_type" "neon_bitops")) | |
597 | "em1_nalu") | |
598 | ||
599 | (define_insn_reservation | |
600 | "exynos_m1_neon_bitops_q" 3 | |
601 | (and (eq_attr "tune" "exynosm1") | |
602 | (eq_attr "exynos_m1_neon_type" "neon_bitops_q")) | |
603 | "(em1_nalu, em1_nalu)") | |
604 | ||
605 | (define_insn_reservation | |
606 | "exynos_m1_neon_bitins" 2 | |
607 | (and (eq_attr "tune" "exynosm1") | |
608 | (eq_attr "exynos_m1_neon_type" "neon_bitins")) | |
609 | "em1_nalu1") | |
610 | ||
611 | ;; TODO: it is more complicated than this. | |
612 | (define_insn_reservation | |
613 | "exynos_m1_neon_tbl" 2 | |
614 | (and (eq_attr "tune" "exynosm1") | |
615 | (eq_attr "exynos_m1_neon_type" "neon_tbl")) | |
616 | "em1_nalu1") | |
617 | ||
618 | (define_insn_reservation | |
619 | "exynos_m1_neon_from_gp" 4 | |
620 | (and (eq_attr "tune" "exynosm1") | |
621 | (eq_attr "exynos_m1_neon_type" "neon_from_gp")) | |
622 | "em1_st") | |
623 | ||
624 | (define_insn_reservation | |
625 | "exynos_m1_neon_to_gp" 9 | |
626 | (and (eq_attr "tune" "exynosm1") | |
627 | (eq_attr "exynos_m1_neon_type" "neon_to_gp")) | |
628 | "em1_lfst") | |
629 | ||
630 | ;; Load Instructions. | |
631 | ||
632 | (define_insn_reservation | |
633 | "exynos_m1_neon_load" 5 | |
634 | (and (eq_attr "tune" "exynosm1") | |
635 | (eq_attr "type" "f_loads, f_loadd, neon_ldp")) | |
636 | "em1_ld") | |
637 | ||
638 | (define_insn_reservation | |
639 | "exynos_m1_neon_load_q" 6 | |
640 | (and (eq_attr "tune" "exynosm1") | |
641 | (eq_attr "type" "neon_ldp_q")) | |
642 | "(em1_ld, em1_ld)") | |
643 | ||
644 | (define_insn_reservation | |
645 | "exynos_m1_neon_load1_1" 6 | |
646 | (and (eq_attr "tune" "exynosm1") | |
647 | (eq_attr "exynos_m1_neon_type" "neon_load1_1, neon_load1_all")) | |
648 | "em1_ld") | |
649 | ||
650 | (define_insn_reservation | |
651 | "exynos_m1_neon_load1_2" 6 | |
652 | (and (eq_attr "tune" "exynosm1") | |
653 | (eq_attr "exynos_m1_neon_type" "neon_load1_2")) | |
654 | "(em1_ld * 2)") | |
655 | ||
656 | (define_insn_reservation | |
657 | "exynos_m1_neon_load1_3" 7 | |
658 | (and (eq_attr "tune" "exynosm1") | |
659 | (eq_attr "exynos_m1_neon_type" "neon_load1_3")) | |
660 | "(em1_ld * 3)") | |
661 | ||
662 | (define_insn_reservation | |
663 | "exynos_m1_neon_load1_4" 8 | |
664 | (and (eq_attr "tune" "exynosm1") | |
665 | (eq_attr "exynos_m1_neon_type" "neon_load1_4")) | |
666 | "(em1_ld * 4)") | |
667 | ||
668 | (define_insn_reservation | |
669 | "exynos_m1_neon_load1_one" 7 | |
670 | (and (eq_attr "tune" "exynosm1") | |
671 | (eq_attr "exynos_m1_neon_type" "neon_load1_one")) | |
672 | "((em1_ld * 2), em1_nalu)") | |
673 | ||
674 | (define_insn_reservation | |
675 | "exynos_m1_neon_load2_2" 10 | |
676 | (and (eq_attr "tune" "exynosm1") | |
677 | (eq_attr "exynos_m1_neon_type" "neon_load2_2")) | |
678 | "(em1_ld * 5)") | |
679 | ||
680 | (define_insn_reservation | |
681 | "exynos_m1_neon_load2_one" 7 | |
682 | (and (eq_attr "tune" "exynosm1") | |
683 | (eq_attr "exynos_m1_neon_type" "neon_load2_one")) | |
684 | "((em1_ld * 2), (em1_nalu * 2))") | |
685 | ||
686 | (define_insn_reservation | |
687 | "exynos_m1_neon_load2_all" 6 | |
688 | (and (eq_attr "tune" "exynosm1") | |
689 | (eq_attr "exynos_m1_neon_type" "neon_load2_all")) | |
690 | "(em1_ld * 2)") | |
691 | ||
692 | (define_insn_reservation | |
693 | "exynos_m1_neon_load3_3" 12 | |
694 | (and (eq_attr "tune" "exynosm1") | |
695 | (eq_attr "exynos_m1_neon_type" "neon_load3_3")) | |
696 | "(em1_ld * 6)") | |
697 | ||
698 | (define_insn_reservation | |
699 | "exynos_m1_neon_load3_one" 9 | |
700 | (and (eq_attr "tune" "exynosm1") | |
701 | (eq_attr "exynos_m1_neon_type" "neon_load3_one")) | |
702 | "((em1_ld * 4), (em1_nalu * 3))") | |
703 | ||
704 | (define_insn_reservation | |
705 | "exynos_m1_neon_load3_all" 7 | |
706 | (and (eq_attr "tune" "exynosm1") | |
707 | (eq_attr "exynos_m1_neon_type" "neon_load3_all")) | |
708 | "(em1_ld * 3)") | |
709 | ||
710 | (define_insn_reservation | |
711 | "exynos_m1_neon_load4_4" 14 | |
712 | (and (eq_attr "tune" "exynosm1") | |
713 | (eq_attr "exynos_m1_neon_type" "neon_load4_4")) | |
714 | "(em1_ld * 7)") | |
715 | ||
716 | (define_insn_reservation | |
717 | "exynos_m1_neon_load4_one" 9 | |
718 | (and (eq_attr "tune" "exynosm1") | |
719 | (eq_attr "exynos_m1_neon_type" "neon_load4_one")) | |
720 | "((em1_ld * 4), (em1_nalu * 4))") | |
721 | ||
722 | (define_insn_reservation | |
723 | "exynos_m1_neon_load4_all" 8 | |
724 | (and (eq_attr "tune" "exynosm1") | |
725 | (eq_attr "exynos_m1_neon_type" "neon_load4_all")) | |
726 | "(em1_ld * 4)") | |
727 | ||
728 | ;; Store Instructions. | |
729 | ||
730 | (define_insn_reservation | |
731 | "exynos_m1_neon_store" 1 | |
732 | (and (eq_attr "tune" "exynosm1") | |
733 | (eq_attr "exynos_m1_neon_type" "neon_store")) | |
734 | "(em1_fst, em1_st)") | |
735 | ||
736 | (define_insn_reservation | |
737 | "exynos_m1_neon_store1_1" 1 | |
738 | (and (eq_attr "tune" "exynosm1") | |
739 | (eq_attr "exynos_m1_neon_type" "neon_store1_1")) | |
740 | "em1_sfst") | |
741 | ||
742 | (define_insn_reservation | |
743 | "exynos_m1_neon_store1_2" 2 | |
744 | (and (eq_attr "tune" "exynosm1") | |
745 | (eq_attr "exynos_m1_neon_type" "neon_store1_2")) | |
746 | "(em1_sfst * 2)") | |
747 | ||
748 | (define_insn_reservation | |
749 | "exynos_m1_neon_store1_3" 3 | |
750 | (and (eq_attr "tune" "exynosm1") | |
751 | (eq_attr "exynos_m1_neon_type" "neon_store1_3")) | |
752 | "(em1_sfst * 3)") | |
753 | ||
754 | (define_insn_reservation | |
755 | "exynos_m1_neon_store1_4" 4 | |
756 | (and (eq_attr "tune" "exynosm1") | |
757 | (eq_attr "exynos_m1_neon_type" "neon_store1_4")) | |
758 | "(em1_sfst * 4)") | |
759 | ||
760 | (define_insn_reservation | |
761 | "exynos_m1_neon_store1_one" 7 | |
762 | (and (eq_attr "tune" "exynosm1") | |
763 | (eq_attr "exynos_m1_neon_type" "neon_store1_one")) | |
764 | "(em1_fst, em1_st)") | |
765 | ||
766 | (define_insn_reservation | |
767 | "exynos_m1_neon_store2" 7 | |
768 | (and (eq_attr "tune" "exynosm1") | |
769 | (eq_attr "exynos_m1_neon_type" "neon_store2_2, neon_store2_one")) | |
770 | "em1_sfst, em1_fst") | |
771 | ||
772 | (define_insn_reservation | |
773 | "exynos_m1_neon_store3" 16 | |
774 | (and (eq_attr "tune" "exynosm1") | |
775 | (eq_attr "exynos_m1_neon_type" "neon_store3_3, neon_store3_one")) | |
776 | "((em1_sfst * 3), (em1_fst * 2), em1_nalu)") | |
777 | ||
778 | (define_insn_reservation | |
779 | "exynos_m1_neon_store4" 17 | |
780 | (and (eq_attr "tune" "exynosm1") | |
781 | (eq_attr "exynos_m1_neon_type" "neon_store4_4, neon_store4_one")) | |
782 | "((em1_sfst * 4), (em1_fst * 2), em1_nalu)") | |
783 | ||
784 | ;; Floating-Point Operations. | |
785 | ||
786 | (define_insn_reservation "exynos_m1_fp_const" 2 | |
787 | (and (eq_attr "tune" "exynosm1") | |
788 | (eq_attr "type" "fconsts, fconstd")) | |
789 | "em1_nalu") | |
790 | ||
791 | (define_insn_reservation "exynos_m1_fp_add" 4 | |
792 | (and (eq_attr "tune" "exynosm1") | |
793 | (eq_attr "type" "fadds, faddd")) | |
794 | "em1_fadd") | |
795 | ||
796 | (define_insn_reservation "exynos_m1_fp_mul" 5 | |
797 | (and (eq_attr "tune" "exynosm1") | |
798 | (eq_attr "type" "fmuls, fmuld")) | |
799 | "em1_fmac") | |
800 | ||
801 | (define_insn_reservation "exynos_m1_fp_mac" 6 | |
802 | (and (eq_attr "tune" "exynosm1") | |
803 | (eq_attr "type" "fmacs, ffmas, fmacd, ffmad")) | |
804 | "em1_fmac") | |
805 | ||
806 | (define_insn_reservation "exynos_m1_fp_cvt" 4 | |
807 | (and (eq_attr "tune" "exynosm1") | |
808 | (eq_attr "type" "f_cvt, f_rints, f_rintd")) | |
809 | "em1_fcvt") | |
810 | ||
811 | (define_insn_reservation "exynos_m1_fp_cvt_i" 13 | |
812 | (and (eq_attr "tune" "exynosm1") | |
813 | (eq_attr "type" "f_cvtf2i")) | |
814 | "(em1_fcvt, em1_lfst)") | |
815 | ||
816 | (define_insn_reservation "exynos_m1_i_cvt_fp" 9 | |
817 | (and (eq_attr "tune" "exynosm1") | |
818 | (eq_attr "type" "f_cvti2f")) | |
819 | "(em1_st, em1_fcvt)") | |
820 | ||
821 | (define_insn_reservation "exynos_m1_fp_cmp" 4 | |
822 | (and (eq_attr "tune" "exynosm1") | |
823 | (eq_attr "type" "fcmps, fcmpd")) | |
824 | "em1_nmisc") | |
825 | ||
826 | (define_insn_reservation "exynos_m1_fp_sel" 4 | |
827 | (and (eq_attr "tune" "exynosm1") | |
828 | (eq_attr "type" "fcsel")) | |
829 | "(em1_st + em1_nalu0)") | |
830 | ||
831 | (define_insn_reservation "exynos_m1_fp_arith" 2 | |
832 | (and (eq_attr "tune" "exynosm1") | |
833 | (eq_attr "type" "ffariths, ffarithd")) | |
834 | "em1_nalu") | |
835 | ||
836 | (define_insn_reservation "exynos_m1_fp_cpy" 2 | |
837 | (and (eq_attr "tune" "exynosm1") | |
838 | (eq_attr "type" "fmov")) | |
839 | "em1_nalu") | |
840 | ||
841 | (define_insn_reservation "exynos_m1_fp_divs" 15 | |
842 | (and (eq_attr "tune" "exynosm1") | |
843 | (eq_attr "type" "fdivs, neon_fp_div_s, neon_fp_div_s_q,\ | |
844 | fsqrts, neon_fp_sqrt_s, neon_fp_sqrt_s_q")) | |
845 | "(em1_fvar * 9)") | |
846 | ||
847 | (define_insn_reservation "exynos_m1_fp_divd" 22 | |
848 | (and (eq_attr "tune" "exynosm1") | |
849 | (eq_attr "type" "fdivd, neon_fp_div_d, neon_fp_div_d_q,\ | |
850 | fsqrtd, neon_fp_sqrt_d, neon_fp_sqrt_d_q")) | |
851 | "(em1_fvar * 9)") | |
852 | ||
853 | (define_insn_reservation "exynos_m1_fp_minmax" 2 | |
854 | (and (eq_attr "tune" "exynosm1") | |
855 | (eq_attr "type" "f_minmaxs, f_minmaxd")) | |
856 | "(em1_nmisc * 2)") | |
857 | ||
858 | ;; Crypto Operations. | |
859 | ||
860 | (define_insn_reservation "exynos_m1_crypto_simple" 2 | |
861 | (and (eq_attr "tune" "exynosm1") | |
862 | (eq_attr "type" "crypto_aese, crypto_aesmc,\ | |
863 | crypto_sha1_xor, crypto_sha1_fast, crypto_sha256_fast")) | |
864 | "em1_ncrypt") | |
865 | ||
866 | (define_insn_reservation "exynos_m1_crypto_complex" 6 | |
867 | (and (eq_attr "tune" "exynosm1") | |
868 | (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow")) | |
869 | "em1_ncrypt") | |
870 | ||
871 | (define_insn_reservation "exynos_m1_crypto_poly" 2 | |
872 | (and (eq_attr "tune" "exynosm1") | |
873 | (eq_attr "type" "neon_mul_b_long, neon_mul_h_long, neon_mul_s_long")) | |
874 | "em1_ncrypt") | |
875 | ||
876 | (define_insn_reservation "exynos_m1_crypto_polyl" 4 | |
877 | (and (eq_attr "tune" "exynosm1") | |
878 | (eq_attr "type" "neon_mul_d_long")) | |
879 | "em1_ncrypt") | |
880 | ||
881 | (define_insn_reservation "exynos_m1_crc" 2 | |
882 | (and (eq_attr "tune" "exynosm1") | |
883 | (eq_attr "type" "crc")) | |
884 | "em1_c") | |
885 | ||
886 | ;; Simple execution unit bypasses | |
887 | ||
888 | ;; Pre-decrement and post-increment addressing modes update the register quickly. | |
889 | ;; TODO: figure out how to tell the addressing mode register from the loaded one. | |
890 | (define_bypass 1 "exynos_m1_store*" "exynos_m1_store*") | |
891 | ||
892 | ;; MLAs can feed other MLAs quickly. | |
893 | (define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*") | |
894 | ||
895 | ;; Insns in FMAC or FADD can feed other such insns quickly. | |
896 | (define_bypass 4 "exynos_m1_fp_mul" | |
897 | "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") | |
898 | (define_bypass 5 "exynos_m1_fp_mac" | |
899 | "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") | |
900 | (define_bypass 4 "exynos_m1_neon_fp_mul" | |
901 | "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ | |
902 | exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") | |
903 | (define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step" | |
904 | "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ | |
905 | exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") | |
906 | ||
907 | (define_bypass 3 "exynos_m1_fp_add" | |
908 | "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") | |
909 | (define_bypass 3 "exynos_m1_neon_fp_add" | |
910 | "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ | |
911 | exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") | |
912 | ||
913 | ;; Insns in NALU can feed other such insns quickly. | |
914 | (define_bypass 1 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy" | |
915 | "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\ | |
916 | exynos_m1_fp_sel") | |
917 | (define_bypass 3 "exynos_m1_fp_sel" | |
918 | "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\ | |
919 | exynos_m1_fp_sel") | |
920 | (define_bypass 1 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\ | |
921 | exynos_m1_neon_bitops, exynos_m1_neon_bitins,\ | |
922 | exynos_m1_neon_tbl" | |
923 | "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\ | |
924 | exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\ | |
925 | exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\ | |
926 | exynos_m1_neon_tbl") | |
927 | (define_bypass 3 "exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex" | |
928 | "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\ | |
929 | exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\ | |
930 | exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\ | |
931 | exynos_m1_neon_tbl") | |
932 | (define_bypass 1 "exynos_m1_neon_fp_unary" "exynos_m1_neon_fp_unary") | |
933 | ||
934 | ;; Insns in NCRYPT can feed other such insns quickly. | |
935 | (define_bypass 1 "exynos_m1_crypto_simple, exynos_m1_crypto_poly" | |
936 | "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ | |
937 | exynos_m1_crypto_poly*") | |
938 | (define_bypass 3 "exynos_m1_crypto_polyl" | |
939 | "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ | |
940 | exynos_m1_crypto_poly*") | |
941 | (define_bypass 5 "exynos_m1_crypto_complex" | |
942 | "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ | |
943 | exynos_m1_crypto_poly*") | |
944 | ||
945 | ;; Predicted branches take no time, but mispredicted ones take forever anyway. | |
946 | (define_bypass 1 "exynos_m1_*" | |
947 | "exynos_m1_call, exynos_m1_branch") |