]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/exynos-m1.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / arm / exynos-m1.md
1 ;; Samsung Exynos M1 pipeline description
2 ;; Copyright (C) 2014-2016 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify it
7 ;; under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful, but
12 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;; General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_attr "exynos_m1_neon_type"
21 "neon_arith_simple, neon_arith_basic, neon_arith_complex,
22 neon_multiply, neon_mla, neon_mla_q, neon_mla_long, neon_sat_mla_long,
23 neon_shift_acc, neon_shift_imm_basic, neon_shift_imm_complex,
24 neon_shift_reg_basic, neon_shift_reg_basic_q,
25 neon_shift_reg_complex, neon_shift_reg_complex_q,
26 neon_fp_unary, neon_fp_add, neon_fp_abd, neon_fp_compare,
27 neon_fp_reduc_minmax, neon_fp_reduc_add, neon_fp_round, neon_fp_cvt,
28 neon_fp_minmax, neon_fp_mul, neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q,
29 neon_fp_estimate, neon_fp_estimatex, neon_fp_step,
30 neon_bitops, neon_bitops_q, neon_bitins,
31 neon_to_gp, neon_from_gp, neon_move, neon_tbl,
32 neon_load1_1, neon_load1_2, neon_load1_3, neon_load1_4,
33 neon_load1_one, neon_load1_all,
34 neon_load2_2, neon_load2_one, neon_load2_all,
35 neon_load3_3, neon_load3_one, neon_load3_all,
36 neon_load4_4, neon_load4_one, neon_load4_all,
37 neon_store,
38 neon_store1_1, neon_store1_2, neon_store1_3, neon_store1_4, neon_store1_one,
39 neon_store2_2, neon_store2_one,
40 neon_store3_3, neon_store3_one,
41 neon_store4_4, neon_store4_one,
42 unknown"
43 (cond [
44 (eq_attr "type" "neon_abd, neon_abd_q, neon_abd_long,\
45 neon_abs, neon_abs_q,\
46 neon_minmax, neon_minmax_q")
47 (const_string "neon_arith_simple")
48
49 (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\
50 neon_neg, neon_neg_q,\
51 neon_sub, neon_sub_q, neon_sub_long, neon_sub_widen,\
52 neon_logic, neon_logic_q, neon_tst, neon_tst_q,\
53 neon_compare_zero, neon_compare_zero_q")
54 (const_string "neon_arith_basic")
55
56 (eq_attr "type" "neon_add_widen, neon_arith_acc, neon_arith_acc_q,\
57 neon_reduc_add, neon_reduc_add_q,\
58 neon_reduc_add_acc, neon_reduc_add_acc_q,\
59 neon_reduc_add_long, neon_add_halve_narrow_q,\
60 neon_add_halve, neon_add_halve_q,\
61 neon_sub_halve, neon_sub_halve_q, neon_qabs,\
62 neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
63 neon_qneg_q, neon_qsub, neon_qsub_q,\
64 neon_sub_halve_narrow_q,\
65 neon_compare, neon_compare_q,\
66 neon_reduc_minmax, neon_reduc_minmax_q")
67 (const_string "neon_arith_complex")
68
69 (eq_attr "type" "neon_mul_b, neon_mul_b_q, neon_mul_h, neon_mul_h_q,\
70 neon_mul_s, neon_mul_s_q,\
71 neon_mul_h_scalar, neon_mul_h_scalar_q,\
72 neon_mul_s_scalar, neon_mul_s_scalar_q,\
73 neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
74 neon_sat_mul_b, neon_sat_mul_b_q,\
75 neon_sat_mul_h, neon_sat_mul_h_q,\
76 neon_sat_mul_s, neon_sat_mul_s_q,\
77 neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q,\
78 neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q,\
79 neon_sat_mul_b_long, neon_sat_mul_h_long,\
80 neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
81 neon_sat_mul_s_scalar_long")
82 (const_string "neon_multiply")
83
84 (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\
85 neon_mla_h_scalar, neon_mla_s_scalar,\
86 neon_mla_b_long, neon_mla_h_long,\
87 neon_mla_s_long,\
88 neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
89 neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
90 neon_mla_h_scalar_q, neon_mla_s_scalar_q")
91 (const_string "neon_mla")
92
93 (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\
94 neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
95 neon_sat_mla_s_scalar_long")
96 (const_string "neon_sat_mla_long")
97
98 (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
99 (const_string "neon_shift_acc")
100
101 (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
102 neon_shift_imm_narrow_q, neon_shift_imm_long")
103 (const_string "neon_shift_imm_basic")
104
105 (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
106 neon_sat_shift_imm_narrow_q")
107 (const_string "neon_shift_imm_complex")
108
109 (eq_attr "type" "neon_shift_reg, neon_shift_reg_q")
110 (const_string "neon_shift_reg_basic")
111
112 (eq_attr "type" "neon_sat_shift_reg, neon_sat_shift_reg_q")
113 (const_string "neon_shift_reg_complex")
114
115 (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
116 neon_fp_abs_s, neon_fp_abs_s_q,\
117 neon_fp_neg_d, neon_fp_neg_d_q,\
118 neon_fp_abs_d, neon_fp_abs_d_q")
119 (const_string "neon_fp_unary")
120
121 (eq_attr "type" "neon_fp_addsub_s, neon_fp_addsub_s_q,\
122 neon_fp_addsub_d, neon_fp_addsub_d_q")
123 (const_string "neon_fp_add")
124
125 (eq_attr "type" "neon_fp_abd_s, neon_fp_abd_s_q,\
126 neon_fp_abd_d, neon_fp_abd_d_q")
127 (const_string "neon_fp_abd")
128
129 (eq_attr "type" "neon_fp_compare_s, neon_fp_compare_s_q,\
130 neon_fp_compare_d, neon_fp_compare_d_q,\
131 neon_fp_minmax_s, neon_fp_minmax_s_q,\
132 neon_fp_minmax_d, neon_fp_minmax_d_q")
133 (const_string "neon_fp_compare")
134
135 (eq_attr "type" "neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_s_q,\
136 neon_fp_reduc_minmax_d, neon_fp_reduc_minmax_d_q")
137 (const_string "neon_fp_reduc_minmax")
138
139 (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_add_s_q,\
140 neon_fp_reduc_add_d, neon_fp_reduc_add_d_q")
141 (const_string "neon_fp_reduc_add")
142
143 (eq_attr "type" "neon_fp_round_s, neon_fp_round_s_q,\
144 neon_fp_round_d, neon_fp_round_d_q")
145 (const_string "neon_fp_round")
146
147 (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h,
148 neon_fp_to_int_s, neon_fp_to_int_s_q,\
149 neon_fp_to_int_d_q, neon_fp_to_int_d,\
150 neon_int_to_fp_s, neon_int_to_fp_s_q,\
151 neon_int_to_fp_d, neon_int_to_fp_d_q")
152 (const_string "neon_fp_cvt")
153
154 (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_q,\
155 neon_fp_mul_s_scalar, neon_fp_mul_s_scalar_q,\
156 neon_fp_mul_d, neon_fp_mul_d_q,\
157 neon_fp_mul_d_scalar_q")
158 (const_string "neon_fp_mul")
159
160 (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_q,\
161 neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
162 neon_fp_mla_d, neon_fp_mla_d_q,\
163 neon_fp_mla_d_scalar_q")
164 (const_string "neon_fp_mla")
165
166 (eq_attr "type" "neon_fp_recpe_s, neon_fp_recpe_s_q,\
167 neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\
168 neon_fp_recpe_d, neon_fp_recpe_d_q,\
169 neon_fp_rsqrte_d, neon_fp_rsqrte_d_q")
170 (const_string "neon_fp_estimate")
171
172 (eq_attr "type" "neon_fp_recpx_s, neon_fp_recpx_s_q,\
173 neon_fp_recpx_d, neon_fp_recpx_d_q")
174 (const_string "neon_fp_estimatex")
175
176 (eq_attr "type" "neon_fp_recps_s, neon_fp_recps_s_q,\
177 neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\
178 neon_fp_recps_d, neon_fp_recps_d_q,\
179 neon_fp_rsqrts_d, neon_fp_rsqrts_d_q")
180 (const_string "neon_fp_step")
181
182 (eq_attr "type" "neon_rbit, neon_rbit_q,\
183 neon_cls, neon_cls_q, neon_cnt, neon_cnt_q,\
184 neon_dup, neon_dup_q,\
185 neon_rev, neon_rev_q,\
186 neon_move, neon_move_q,
187 neon_ext, neon_permute, neon_zip")
188 (const_string "neon_bitops")
189
190 (eq_attr "type" "neon_ext_q, neon_permute_q, neon_zip_q")
191 (const_string "neon_bitops_q")
192
193 (eq_attr "type" "neon_bsl, neon_bsl_q")
194 (const_string "neon_bitins")
195
196 (eq_attr "type" "neon_tbl1, neon_tbl2, neon_tbl3, neon_tbl4")
197 (const_string "neon_tbl")
198
199 (eq_attr "type" "neon_from_gp, neon_from_gp_q, f_mcr, f_mcrr")
200 (const_string "neon_from_gp")
201
202 (eq_attr "type" "neon_to_gp, neon_to_gp_q, f_mrc, f_mrrc")
203 (const_string "neon_to_gp")
204
205 (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q")
206 (const_string "neon_load1_1")
207
208 (eq_attr "type" "neon_load1_2reg, neon_load1_2reg_q")
209 (const_string "neon_load1_2")
210
211 (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q")
212 (const_string "neon_load1_3")
213
214 (eq_attr "type" "neon_load1_4reg, neon_load1_4reg_q")
215 (const_string "neon_load1_4")
216
217 (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q")
218 (const_string "neon_load1_one")
219
220 (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q")
221 (const_string "neon_load1_all")
222
223 (eq_attr "type" "neon_load2_2reg, neon_load2_2reg_q,\
224 neon_load2_4reg, neon_load2_4reg_q")
225 (const_string "neon_load2_2")
226
227 (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q")
228 (const_string "neon_load2_one")
229
230 (eq_attr "type" "neon_load2_all_lanes, neon_load2_all_lanes_q")
231 (const_string "neon_load2_all")
232
233 (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q")
234 (const_string "neon_load3_3")
235
236 (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q")
237 (const_string "neon_load3_one")
238
239 (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q")
240 (const_string "neon_load3_all")
241
242 (eq_attr "type" "neon_load4_4reg, neon_load4_4reg_q")
243 (const_string "neon_load4_4")
244
245 (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
246 (const_string "neon_load4_one")
247
248 (eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q")
249 (const_string "neon_load4_all")
250
251 (eq_attr "type" "f_stores, f_stored,\
252 neon_stp, neon_stp_q")
253 (const_string "neon_store")
254
255 (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q")
256 (const_string "neon_store1_1")
257
258 (eq_attr "type" "neon_store1_2reg, neon_store1_2reg_q")
259 (const_string "neon_store1_2")
260
261 (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q")
262 (const_string "neon_store1_3")
263
264 (eq_attr "type" "neon_store1_4reg, neon_store1_4reg_q")
265 (const_string "neon_store1_4")
266
267 (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q")
268 (const_string "neon_store1_one")
269
270 (eq_attr "type" "neon_store2_2reg, neon_store2_2reg_q,\
271 neon_store2_4reg, neon_store2_4reg_q")
272 (const_string "neon_store2_2")
273
274 (eq_attr "type" "neon_store2_one_lane, neon_store2_one_lane_q")
275 (const_string "neon_store2_one")
276
277 (eq_attr "type" "neon_store3_3reg, neon_store3_3reg_q")
278 (const_string "neon_store3_3")
279
280 (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q")
281 (const_string "neon_store3_one")
282
283 (eq_attr "type" "neon_store4_4reg, neon_store4_4reg_q")
284 (const_string "neon_store4_4")
285
286 (eq_attr "type" "neon_store4_one_lane, neon_store4_one_lane_q")
287 (const_string "neon_store4_one")]
288
289 (const_string "unknown")))
290
291 ;; The Exynos M1 core is modeled as a triple issue pipeline that has
292 ;; the following functional units.
293
294 (define_automaton "exynos_m1_gp")
295 (define_automaton "exynos_m1_ls")
296 (define_automaton "exynos_m1_fp")
297
298 ;; 1. Two pipelines for simple integer operations: A, B
299 ;; 2. One pipeline for simple or complex integer operations: C
300
301 (define_cpu_unit "em1_xa, em1_xb, em1_xc" "exynos_m1_gp")
302
303 (define_reservation "em1_alu" "(em1_xa | em1_xb | em1_xc)")
304 (define_reservation "em1_c" "em1_xc")
305
306 ;; 3. Two asymmetric pipelines for Neon and FP operations: F0, F1
307
308 (define_cpu_unit "em1_f0, em1_f1" "exynos_m1_fp")
309
310 (define_reservation "em1_fmac" "em1_f0")
311 (define_reservation "em1_fcvt" "em1_f0")
312 (define_reservation "em1_nalu" "(em1_f0 | em1_f1)")
313 (define_reservation "em1_nalu0" "em1_f0")
314 (define_reservation "em1_nalu1" "em1_f1")
315 (define_reservation "em1_nmisc" "em1_f0")
316 (define_reservation "em1_ncrypt" "em1_f0")
317 (define_reservation "em1_fadd" "em1_f1")
318 (define_reservation "em1_fvar" "em1_f1")
319 (define_reservation "em1_fst" "em1_f1")
320
321 ;; 4. One pipeline for branch operations: BX
322
323 (define_cpu_unit "em1_bx" "exynos_m1_gp")
324
325 (define_reservation "em1_br" "em1_bx")
326
327 ;; 5. One AGU for loads: L
328 ;; One AGU for stores and one pipeline for stores: S, SD
329
330 (define_cpu_unit "em1_lx" "exynos_m1_ls")
331 (define_cpu_unit "em1_sx, em1_sd" "exynos_m1_ls")
332
333 (define_reservation "em1_ld" "em1_lx")
334 (define_reservation "em1_st" "(em1_sx + em1_sd)")
335
336 ;; Common occurrences
337 (define_reservation "em1_sfst" "(em1_fst + em1_st)")
338 (define_reservation "em1_lfst" "(em1_fst + em1_ld)")
339
340 ;; Branches
341 ;;
342 ;; No latency as there is no result
343 ;; TODO: Unconditional branches use no units;
344 ;; conditional branches add the BX unit;
345 ;; indirect branches add the C unit.
346 (define_insn_reservation "exynos_m1_branch" 0
347 (and (eq_attr "tune" "exynosm1")
348 (eq_attr "type" "branch"))
349 "em1_br")
350
351 (define_insn_reservation "exynos_m1_call" 1
352 (and (eq_attr "tune" "exynosm1")
353 (eq_attr "type" "call"))
354 "em1_alu")
355
356 ;; Basic ALU
357 ;;
358 ;; Simple ALU without shift, non-predicated
359 (define_insn_reservation "exynos_m1_alu" 1
360 (and (eq_attr "tune" "exynosm1")
361 (and (not (eq_attr "predicated" "yes"))
362 (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\
363 alu_sreg, alus_sreg, logic_reg, logics_reg,\
364 adc_imm, adcs_imm, adc_reg, adcs_reg,\
365 adr, bfm, clz, rbit, rev, csel, alu_dsp_reg,\
366 shift_imm, shift_reg, rotate_imm, extend,\
367 mov_imm, mov_reg,\
368 mvn_imm, mvn_reg,\
369 mrs, multiple")))
370 "em1_alu")
371
372 ;; Simple ALU without shift, predicated
373 (define_insn_reservation "exynos_m1_alu_p" 1
374 (and (eq_attr "tune" "exynosm1")
375 (and (eq_attr "predicated" "yes")
376 (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\
377 alu_sreg, alus_sreg, logic_reg, logics_reg,\
378 adc_imm, adcs_imm, adc_reg, adcs_reg,\
379 adr, bfm, clz, rbit, rev, alu_dsp_reg,\
380 shift_imm, shift_reg, rotate_imm, extend,\
381 mov_imm, mov_reg,\
382 mvn_imm, mvn_reg,\
383 mrs, multiple")))
384 "em1_c")
385
386 ;; ALU ops with immediate shift
387 ;; TODO: if the shift value is between 0 and 3, the latency is just 1 cycle;
388 ;; otherwise it takes 2 cycles and the unit is blocked;
389 ;; for now, assume the latter's latency and the former's units.
390 (define_insn_reservation "exynos_m1_alu_shift" 2
391 (and (eq_attr "tune" "exynosm1")
392 (eq_attr "type" "alu_ext, alus_ext,\
393 alu_shift_imm, alus_shift_imm,\
394 logic_shift_imm, logics_shift_imm,\
395 mov_shift, mvn_shift"))
396 "(em1_alu)")
397
398 ;; ALU ops with register controlled shift, non-predicated
399 (define_insn_reservation "exynos_m1_alu_shift_reg" 2
400 (and (eq_attr "tune" "exynosm1")
401 (and (not (eq_attr "predicated" "yes"))
402 (eq_attr "type" "alu_shift_reg, alus_shift_reg,\
403 logic_shift_reg, logics_shift_reg,\
404 mov_shift_reg, mvn_shift_reg")))
405 "(em1_alu * 2)")
406
407 ;; ALU ops with register controlled shift, predicated
408 (define_insn_reservation "exynos_m1_alu_shift_reg_p" 2
409 (and (eq_attr "tune" "exynosm1")
410 (and (eq_attr "predicated" "yes")
411 (eq_attr "type" "alu_shift_reg, alus_shift_reg,\
412 logic_shift_reg, logics_shift_reg,\
413 mov_shift_reg, mvn_shift_reg")))
414 "(em1_alu, em1_c)")
415
416 ;; Integer multiply
417 (define_insn_reservation "exynos_m1_mla" 3
418 (and (eq_attr "tune" "exynosm1")
419 (eq_attr "mul32" "yes"))
420 "em1_c")
421
422 (define_insn_reservation "exynos_m1_mlal" 4
423 (and (eq_attr "tune" "exynosm1")
424 (eq_attr "mul64" "yes"))
425 "em1_alu, em1_c")
426
427 ;; Integer divide
428 ;; TODO: assume the median latency; blocks other divisions
429 (define_insn_reservation "exynos_m1_div" 13
430 (and (eq_attr "tune" "exynosm1")
431 (eq_attr "type" "udiv, sdiv"))
432 "em1_c")
433
434 ;; Load-store execution Unit
435 ;;
436 ;; Loads of up to 2 words.
437 (define_insn_reservation "exynos_m1_load" 4
438 (and (eq_attr "tune" "exynosm1")
439 (eq_attr "type" "load_byte, load1, load2"))
440 "em1_ld")
441
442 ;; Loads of 3 or 4 words.
443 (define_insn_reservation "exynos_m1_loadm" 6
444 (and (eq_attr "tune" "exynosm1")
445 (eq_attr "type" "load3, load4"))
446 "(em1_ld * 3)")
447
448 ;; Stores of up to 2 words.
449 (define_insn_reservation "exynos_m1_store" 1
450 (and (eq_attr "tune" "exynosm1")
451 (eq_attr "type" "store1, store2"))
452 "em1_st")
453
454 ;; Stores of 3 or 4 words.
455 (define_insn_reservation "exynos_m1_storem" 3
456 (and (eq_attr "tune" "exynosm1")
457 (eq_attr "type" "store3, store4"))
458 "(em1_st * 3)")
459
460 ;; Advanced SIMD Unit
461 ;;
462 ;; Integer Arithmetic Instructions.
463
464 (define_insn_reservation "exynos_m1_arith_simple" 1
465 (and (eq_attr "tune" "exynosm1")
466 (eq_attr "exynos_m1_neon_type" "neon_arith_simple"))
467 "em1_nmisc")
468
469 (define_insn_reservation "exynos_m1_neon_arith_basic" 2
470 (and (eq_attr "tune" "exynosm1")
471 (eq_attr "exynos_m1_neon_type" "neon_arith_basic"))
472 "em1_nalu")
473
474 (define_insn_reservation "exynos_m1_neon_arith_complex" 3
475 (and (eq_attr "tune" "exynosm1")
476 (eq_attr "exynos_m1_neon_type" "neon_arith_complex"))
477 "em1_nmisc")
478
479 ;; Integer Multiply Instructions.
480
481 (define_insn_reservation "exynos_m1_neon_multiply" 4
482 (and (eq_attr "tune" "exynosm1")
483 (eq_attr "exynos_m1_neon_type"
484 "neon_multiply, neon_mla, neon_sat_mla_long"))
485 "em1_nmisc")
486
487 ;; Integer Shift Instructions.
488
489 (define_insn_reservation
490 "exynos_m1_neon_shift_acc" 4
491 (and (eq_attr "tune" "exynosm1")
492 (eq_attr "exynos_m1_neon_type" "neon_shift_acc"))
493 "em1_nalu1")
494
495 (define_insn_reservation
496 "exynos_m1_neon_shift_basic" 2
497 (and (eq_attr "tune" "exynosm1")
498 (eq_attr "exynos_m1_neon_type"
499 "neon_shift_imm_basic, neon_shift_reg_basic"))
500 "em1_nalu")
501
502 (define_insn_reservation
503 "exynos_m1_neon_shift_complex" 4
504 (and (eq_attr "tune" "exynosm1")
505 (eq_attr "exynos_m1_neon_type"
506 "neon_shift_imm_complex, neon_shift_reg_complex"))
507 "em1_nalu1")
508
509 ;; Floating Point Instructions.
510
511 (define_insn_reservation
512 "exynos_m1_neon_fp_unary" 2
513 (and (eq_attr "tune" "exynosm1")
514 (eq_attr "exynos_m1_neon_type" "neon_fp_unary"))
515 "em1_nalu")
516
517 (define_insn_reservation
518 "exynos_m1_neon_fp_add" 4
519 (and (eq_attr "tune" "exynosm1")
520 (eq_attr "exynos_m1_neon_type" "neon_fp_add"))
521 "em1_fadd")
522
523 (define_insn_reservation
524 "exynos_m1_neon_fp_abd" 3
525 (and (eq_attr "tune" "exynosm1")
526 (eq_attr "exynos_m1_neon_type" "neon_fp_abd"))
527 "em1_nmisc")
528
529 (define_insn_reservation
530 "exynos_m1_neon_fp_compare" 1
531 (and (eq_attr "tune" "exynosm1")
532 (eq_attr "exynos_m1_neon_type" "neon_fp_compare"))
533 "em1_nmisc")
534
535 ;; TODO: the latency and throughput of reduce insns actually varies between
536 ;; 3-5 and 1/4-1, but picked the median values.
537 (define_insn_reservation
538 "exynos_m1_neon_fp_reduc" 5
539 (and (eq_attr "tune" "exynosm1")
540 (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_minmax"))
541 "(em1_nmisc * 4)")
542
543 (define_insn_reservation
544 "exynos_m1_neon_fp_reduc_add" 10
545 (and (eq_attr "tune" "exynosm1")
546 (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_add"))
547 "((em1_nalu * 2), em1_fadd)")
548
549 (define_insn_reservation
550 "exynos_m1_neon_fp_round" 4
551 (and (eq_attr "tune" "exynosm1")
552 (eq_attr "exynos_m1_neon_type" "neon_fp_round"))
553 "em1_fcvt")
554
555 (define_insn_reservation
556 "exynos_m1_neon_fp_cvt" 4
557 (and (eq_attr "tune" "exynosm1")
558 (eq_attr "exynos_m1_neon_type" "neon_fp_cvt"))
559 "em1_fcvt")
560
561 (define_insn_reservation
562 "exynos_m1_neon_fp_mul" 5
563 (and (eq_attr "tune" "exynosm1")
564 (eq_attr "exynos_m1_neon_type" "neon_fp_mul"))
565 "em1_fmac")
566
567 (define_insn_reservation
568 "exynos_m1_neon_fp_mla" 6
569 (and (eq_attr "tune" "exynosm1")
570 (eq_attr "exynos_m1_neon_type" "neon_fp_mla"))
571 "em1_fmac")
572
573 (define_insn_reservation
574 "exynos_m1_neon_fp_estimate" 5
575 (and (eq_attr "tune" "exynosm1")
576 (eq_attr "exynos_m1_neon_type" "neon_fp_estimate"))
577 "em1_fcvt")
578
579 (define_insn_reservation
580 "exynos_m1_neon_fp_estimatex" 1
581 (and (eq_attr "tune" "exynosm1")
582 (eq_attr "exynos_m1_neon_type" "neon_fp_estimatex"))
583 "em1_nmisc")
584
585 (define_insn_reservation
586 "exynos_m1_neon_fp_step" 6
587 (and (eq_attr "tune" "exynosm1")
588 (eq_attr "exynos_m1_neon_type" "neon_fp_step"))
589 "em1_fmac")
590
591 ;; Miscellaneous Instructions.
592
593 (define_insn_reservation
594 "exynos_m1_neon_bitops" 2
595 (and (eq_attr "tune" "exynosm1")
596 (eq_attr "exynos_m1_neon_type" "neon_bitops"))
597 "em1_nalu")
598
599 (define_insn_reservation
600 "exynos_m1_neon_bitops_q" 3
601 (and (eq_attr "tune" "exynosm1")
602 (eq_attr "exynos_m1_neon_type" "neon_bitops_q"))
603 "(em1_nalu, em1_nalu)")
604
605 (define_insn_reservation
606 "exynos_m1_neon_bitins" 2
607 (and (eq_attr "tune" "exynosm1")
608 (eq_attr "exynos_m1_neon_type" "neon_bitins"))
609 "em1_nalu1")
610
611 ;; TODO: it is more complicated than this.
612 (define_insn_reservation
613 "exynos_m1_neon_tbl" 2
614 (and (eq_attr "tune" "exynosm1")
615 (eq_attr "exynos_m1_neon_type" "neon_tbl"))
616 "em1_nalu1")
617
618 (define_insn_reservation
619 "exynos_m1_neon_from_gp" 4
620 (and (eq_attr "tune" "exynosm1")
621 (eq_attr "exynos_m1_neon_type" "neon_from_gp"))
622 "em1_st")
623
624 (define_insn_reservation
625 "exynos_m1_neon_to_gp" 9
626 (and (eq_attr "tune" "exynosm1")
627 (eq_attr "exynos_m1_neon_type" "neon_to_gp"))
628 "em1_lfst")
629
630 ;; Load Instructions.
631
632 (define_insn_reservation
633 "exynos_m1_neon_load" 5
634 (and (eq_attr "tune" "exynosm1")
635 (eq_attr "type" "f_loads, f_loadd, neon_ldp"))
636 "em1_ld")
637
638 (define_insn_reservation
639 "exynos_m1_neon_load_q" 6
640 (and (eq_attr "tune" "exynosm1")
641 (eq_attr "type" "neon_ldp_q"))
642 "(em1_ld, em1_ld)")
643
644 (define_insn_reservation
645 "exynos_m1_neon_load1_1" 6
646 (and (eq_attr "tune" "exynosm1")
647 (eq_attr "exynos_m1_neon_type" "neon_load1_1, neon_load1_all"))
648 "em1_ld")
649
650 (define_insn_reservation
651 "exynos_m1_neon_load1_2" 6
652 (and (eq_attr "tune" "exynosm1")
653 (eq_attr "exynos_m1_neon_type" "neon_load1_2"))
654 "(em1_ld * 2)")
655
656 (define_insn_reservation
657 "exynos_m1_neon_load1_3" 7
658 (and (eq_attr "tune" "exynosm1")
659 (eq_attr "exynos_m1_neon_type" "neon_load1_3"))
660 "(em1_ld * 3)")
661
662 (define_insn_reservation
663 "exynos_m1_neon_load1_4" 8
664 (and (eq_attr "tune" "exynosm1")
665 (eq_attr "exynos_m1_neon_type" "neon_load1_4"))
666 "(em1_ld * 4)")
667
668 (define_insn_reservation
669 "exynos_m1_neon_load1_one" 7
670 (and (eq_attr "tune" "exynosm1")
671 (eq_attr "exynos_m1_neon_type" "neon_load1_one"))
672 "((em1_ld * 2), em1_nalu)")
673
674 (define_insn_reservation
675 "exynos_m1_neon_load2_2" 10
676 (and (eq_attr "tune" "exynosm1")
677 (eq_attr "exynos_m1_neon_type" "neon_load2_2"))
678 "(em1_ld * 5)")
679
680 (define_insn_reservation
681 "exynos_m1_neon_load2_one" 7
682 (and (eq_attr "tune" "exynosm1")
683 (eq_attr "exynos_m1_neon_type" "neon_load2_one"))
684 "((em1_ld * 2), (em1_nalu * 2))")
685
686 (define_insn_reservation
687 "exynos_m1_neon_load2_all" 6
688 (and (eq_attr "tune" "exynosm1")
689 (eq_attr "exynos_m1_neon_type" "neon_load2_all"))
690 "(em1_ld * 2)")
691
692 (define_insn_reservation
693 "exynos_m1_neon_load3_3" 12
694 (and (eq_attr "tune" "exynosm1")
695 (eq_attr "exynos_m1_neon_type" "neon_load3_3"))
696 "(em1_ld * 6)")
697
698 (define_insn_reservation
699 "exynos_m1_neon_load3_one" 9
700 (and (eq_attr "tune" "exynosm1")
701 (eq_attr "exynos_m1_neon_type" "neon_load3_one"))
702 "((em1_ld * 4), (em1_nalu * 3))")
703
704 (define_insn_reservation
705 "exynos_m1_neon_load3_all" 7
706 (and (eq_attr "tune" "exynosm1")
707 (eq_attr "exynos_m1_neon_type" "neon_load3_all"))
708 "(em1_ld * 3)")
709
710 (define_insn_reservation
711 "exynos_m1_neon_load4_4" 14
712 (and (eq_attr "tune" "exynosm1")
713 (eq_attr "exynos_m1_neon_type" "neon_load4_4"))
714 "(em1_ld * 7)")
715
716 (define_insn_reservation
717 "exynos_m1_neon_load4_one" 9
718 (and (eq_attr "tune" "exynosm1")
719 (eq_attr "exynos_m1_neon_type" "neon_load4_one"))
720 "((em1_ld * 4), (em1_nalu * 4))")
721
722 (define_insn_reservation
723 "exynos_m1_neon_load4_all" 8
724 (and (eq_attr "tune" "exynosm1")
725 (eq_attr "exynos_m1_neon_type" "neon_load4_all"))
726 "(em1_ld * 4)")
727
728 ;; Store Instructions.
729
730 (define_insn_reservation
731 "exynos_m1_neon_store" 1
732 (and (eq_attr "tune" "exynosm1")
733 (eq_attr "exynos_m1_neon_type" "neon_store"))
734 "(em1_fst, em1_st)")
735
736 (define_insn_reservation
737 "exynos_m1_neon_store1_1" 1
738 (and (eq_attr "tune" "exynosm1")
739 (eq_attr "exynos_m1_neon_type" "neon_store1_1"))
740 "em1_sfst")
741
742 (define_insn_reservation
743 "exynos_m1_neon_store1_2" 2
744 (and (eq_attr "tune" "exynosm1")
745 (eq_attr "exynos_m1_neon_type" "neon_store1_2"))
746 "(em1_sfst * 2)")
747
748 (define_insn_reservation
749 "exynos_m1_neon_store1_3" 3
750 (and (eq_attr "tune" "exynosm1")
751 (eq_attr "exynos_m1_neon_type" "neon_store1_3"))
752 "(em1_sfst * 3)")
753
754 (define_insn_reservation
755 "exynos_m1_neon_store1_4" 4
756 (and (eq_attr "tune" "exynosm1")
757 (eq_attr "exynos_m1_neon_type" "neon_store1_4"))
758 "(em1_sfst * 4)")
759
760 (define_insn_reservation
761 "exynos_m1_neon_store1_one" 7
762 (and (eq_attr "tune" "exynosm1")
763 (eq_attr "exynos_m1_neon_type" "neon_store1_one"))
764 "(em1_fst, em1_st)")
765
766 (define_insn_reservation
767 "exynos_m1_neon_store2" 7
768 (and (eq_attr "tune" "exynosm1")
769 (eq_attr "exynos_m1_neon_type" "neon_store2_2, neon_store2_one"))
770 "em1_sfst, em1_fst")
771
772 (define_insn_reservation
773 "exynos_m1_neon_store3" 16
774 (and (eq_attr "tune" "exynosm1")
775 (eq_attr "exynos_m1_neon_type" "neon_store3_3, neon_store3_one"))
776 "((em1_sfst * 3), (em1_fst * 2), em1_nalu)")
777
778 (define_insn_reservation
779 "exynos_m1_neon_store4" 17
780 (and (eq_attr "tune" "exynosm1")
781 (eq_attr "exynos_m1_neon_type" "neon_store4_4, neon_store4_one"))
782 "((em1_sfst * 4), (em1_fst * 2), em1_nalu)")
783
784 ;; Floating-Point Operations.
785
786 (define_insn_reservation "exynos_m1_fp_const" 2
787 (and (eq_attr "tune" "exynosm1")
788 (eq_attr "type" "fconsts, fconstd"))
789 "em1_nalu")
790
791 (define_insn_reservation "exynos_m1_fp_add" 4
792 (and (eq_attr "tune" "exynosm1")
793 (eq_attr "type" "fadds, faddd"))
794 "em1_fadd")
795
796 (define_insn_reservation "exynos_m1_fp_mul" 5
797 (and (eq_attr "tune" "exynosm1")
798 (eq_attr "type" "fmuls, fmuld"))
799 "em1_fmac")
800
801 (define_insn_reservation "exynos_m1_fp_mac" 6
802 (and (eq_attr "tune" "exynosm1")
803 (eq_attr "type" "fmacs, ffmas, fmacd, ffmad"))
804 "em1_fmac")
805
806 (define_insn_reservation "exynos_m1_fp_cvt" 4
807 (and (eq_attr "tune" "exynosm1")
808 (eq_attr "type" "f_cvt, f_rints, f_rintd"))
809 "em1_fcvt")
810
811 (define_insn_reservation "exynos_m1_fp_cvt_i" 13
812 (and (eq_attr "tune" "exynosm1")
813 (eq_attr "type" "f_cvtf2i"))
814 "(em1_fcvt, em1_lfst)")
815
816 (define_insn_reservation "exynos_m1_i_cvt_fp" 9
817 (and (eq_attr "tune" "exynosm1")
818 (eq_attr "type" "f_cvti2f"))
819 "(em1_st, em1_fcvt)")
820
821 (define_insn_reservation "exynos_m1_fp_cmp" 4
822 (and (eq_attr "tune" "exynosm1")
823 (eq_attr "type" "fcmps, fcmpd"))
824 "em1_nmisc")
825
826 (define_insn_reservation "exynos_m1_fp_sel" 4
827 (and (eq_attr "tune" "exynosm1")
828 (eq_attr "type" "fcsel"))
829 "(em1_st + em1_nalu0)")
830
831 (define_insn_reservation "exynos_m1_fp_arith" 2
832 (and (eq_attr "tune" "exynosm1")
833 (eq_attr "type" "ffariths, ffarithd"))
834 "em1_nalu")
835
836 (define_insn_reservation "exynos_m1_fp_cpy" 2
837 (and (eq_attr "tune" "exynosm1")
838 (eq_attr "type" "fmov"))
839 "em1_nalu")
840
841 (define_insn_reservation "exynos_m1_fp_divs" 15
842 (and (eq_attr "tune" "exynosm1")
843 (eq_attr "type" "fdivs, neon_fp_div_s, neon_fp_div_s_q,\
844 fsqrts, neon_fp_sqrt_s, neon_fp_sqrt_s_q"))
845 "(em1_fvar * 9)")
846
847 (define_insn_reservation "exynos_m1_fp_divd" 22
848 (and (eq_attr "tune" "exynosm1")
849 (eq_attr "type" "fdivd, neon_fp_div_d, neon_fp_div_d_q,\
850 fsqrtd, neon_fp_sqrt_d, neon_fp_sqrt_d_q"))
851 "(em1_fvar * 9)")
852
853 (define_insn_reservation "exynos_m1_fp_minmax" 2
854 (and (eq_attr "tune" "exynosm1")
855 (eq_attr "type" "f_minmaxs, f_minmaxd"))
856 "(em1_nmisc * 2)")
857
858 ;; Crypto Operations.
859
860 (define_insn_reservation "exynos_m1_crypto_simple" 2
861 (and (eq_attr "tune" "exynosm1")
862 (eq_attr "type" "crypto_aese, crypto_aesmc,\
863 crypto_sha1_xor, crypto_sha1_fast, crypto_sha256_fast"))
864 "em1_ncrypt")
865
866 (define_insn_reservation "exynos_m1_crypto_complex" 6
867 (and (eq_attr "tune" "exynosm1")
868 (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow"))
869 "em1_ncrypt")
870
871 (define_insn_reservation "exynos_m1_crypto_poly" 2
872 (and (eq_attr "tune" "exynosm1")
873 (eq_attr "type" "neon_mul_b_long, neon_mul_h_long, neon_mul_s_long"))
874 "em1_ncrypt")
875
876 (define_insn_reservation "exynos_m1_crypto_polyl" 4
877 (and (eq_attr "tune" "exynosm1")
878 (eq_attr "type" "neon_mul_d_long"))
879 "em1_ncrypt")
880
881 (define_insn_reservation "exynos_m1_crc" 2
882 (and (eq_attr "tune" "exynosm1")
883 (eq_attr "type" "crc"))
884 "em1_c")
885
886 ;; Simple execution unit bypasses
887
888 ;; Pre-decrement and post-increment addressing modes update the register quickly.
889 ;; TODO: figure out how to tell the addressing mode register from the loaded one.
890 (define_bypass 1 "exynos_m1_store*" "exynos_m1_store*")
891
892 ;; MLAs can feed other MLAs quickly.
893 (define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*")
894
895 ;; Insns in FMAC or FADD can feed other such insns quickly.
896 (define_bypass 4 "exynos_m1_fp_mul"
897 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
898 (define_bypass 5 "exynos_m1_fp_mac"
899 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
900 (define_bypass 4 "exynos_m1_neon_fp_mul"
901 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
902 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
903 (define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step"
904 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
905 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
906
907 (define_bypass 3 "exynos_m1_fp_add"
908 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
909 (define_bypass 3 "exynos_m1_neon_fp_add"
910 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
911 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
912
913 ;; Insns in NALU can feed other such insns quickly.
914 (define_bypass 1 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy"
915 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\
916 exynos_m1_fp_sel")
917 (define_bypass 3 "exynos_m1_fp_sel"
918 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\
919 exynos_m1_fp_sel")
920 (define_bypass 1 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
921 exynos_m1_neon_bitops, exynos_m1_neon_bitins,\
922 exynos_m1_neon_tbl"
923 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
924 exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\
925 exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\
926 exynos_m1_neon_tbl")
927 (define_bypass 3 "exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex"
928 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
929 exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\
930 exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\
931 exynos_m1_neon_tbl")
932 (define_bypass 1 "exynos_m1_neon_fp_unary" "exynos_m1_neon_fp_unary")
933
934 ;; Insns in NCRYPT can feed other such insns quickly.
935 (define_bypass 1 "exynos_m1_crypto_simple, exynos_m1_crypto_poly"
936 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
937 exynos_m1_crypto_poly*")
938 (define_bypass 3 "exynos_m1_crypto_polyl"
939 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
940 exynos_m1_crypto_poly*")
941 (define_bypass 5 "exynos_m1_crypto_complex"
942 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
943 exynos_m1_crypto_poly*")
944
945 ;; Predicted branches take no time, but mispredicted ones take forever anyway.
946 (define_bypass 1 "exynos_m1_*"
947 "exynos_m1_call, exynos_m1_branch")