[thirdparty/gcc.git] / gcc / config / arm / cortex-r4.md

;; ARM Cortex-R4 scheduling description.
;; Copyright (C) 2007-2022 Free Software Foundation, Inc.
;; Contributed by CodeSourcery.

;; This file is part of GCC.

;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published
;; by the Free Software Foundation; either version 3, or (at your
;; option) any later version.

;; GCC is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
;; License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3.  If not see
;; <http://www.gnu.org/licenses/>.

(define_automaton "cortex_r4")

;; We approximate the dual-issue constraints of this core using four
;; "issue units" and a reservation matrix as follows.  The numbers indicate
;; the instruction groups' preferences in order.  Multiple entries for
;; the same numbered preference indicate units that must be reserved
;; together.
;;
;; Issue unit:		A	B	C	ALU
;;
;; ALU w/o reg shift	1st	2nd		1st and 2nd
;; ALU w/ reg shift	1st	2nd	2nd	1st and 2nd
;; Moves		1st	2nd		2nd
;; Multiplication	1st			1st
;; Division		1st			1st
;; Load/store single	1st		1st
;; Other load/store	1st	1st
;; Branches			1st

(define_cpu_unit "cortex_r4_issue_a" "cortex_r4")
(define_cpu_unit "cortex_r4_issue_b" "cortex_r4")
(define_cpu_unit "cortex_r4_issue_c" "cortex_r4")
(define_cpu_unit "cortex_r4_issue_alu" "cortex_r4")

(define_reservation "cortex_r4_alu"
                    "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
                     (cortex_r4_issue_b+cortex_r4_issue_alu)")
(define_reservation "cortex_r4_alu_shift_reg"
                    "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
                     (cortex_r4_issue_b+cortex_r4_issue_c+\
                      cortex_r4_issue_alu)")
(define_reservation "cortex_r4_mov"
                    "cortex_r4_issue_a|(cortex_r4_issue_b+\
                     cortex_r4_issue_alu)")
(define_reservation "cortex_r4_mul" "cortex_r4_issue_a+cortex_r4_issue_alu")
(define_reservation "cortex_r4_mul_2"
                    "(cortex_r4_issue_a+cortex_r4_issue_alu)*2")
;; Division instructions execute out-of-order with respect to the
;; rest of the pipeline and only require reservations on their first and
;; final cycles.
(define_reservation "cortex_r4_div_9"
                    "cortex_r4_issue_a+cortex_r4_issue_alu,\
                     nothing*7,\
                     cortex_r4_issue_a+cortex_r4_issue_alu")
(define_reservation "cortex_r4_div_10"
                    "cortex_r4_issue_a+cortex_r4_issue_alu,\
                     nothing*8,\
                     cortex_r4_issue_a+cortex_r4_issue_alu")
(define_reservation "cortex_r4_load_store"
                    "cortex_r4_issue_a+cortex_r4_issue_c")
(define_reservation "cortex_r4_load_store_2"
                    "(cortex_r4_issue_a+cortex_r4_issue_b)*2")
(define_reservation "cortex_r4_branch" "cortex_r4_issue_b")

;; We assume that all instructions are unconditional.

;; Data processing instructions.  Moves without shifts are kept separate
;; for the purposes of the dual-issue constraints above.
(define_insn_reservation "cortex_r4_alu" 2
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
                        alu_sreg,alus_sreg,logic_reg,logics_reg,\
                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
                        adr,bfm,clz,rbit,rev,\
                        shift_imm,shift_reg,mvn_imm,mvn_reg"))
  "cortex_r4_alu")

(define_insn_reservation "cortex_r4_mov" 2
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "mov_imm,mov_reg"))
  "cortex_r4_mov")

(define_insn_reservation "cortex_r4_alu_shift" 2
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "alu_shift_imm_lsl_1to4,alu_shift_imm_other,alus_shift_imm,\
                        logic_shift_imm,logics_shift_imm,\
                        extend,mov_shift,mvn_shift"))
  "cortex_r4_alu")

(define_insn_reservation "cortex_r4_alu_shift_reg" 2
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
                       logic_shift_reg,logics_shift_reg,\
                       mov_shift_reg,mvn_shift_reg,\
                       mrs,multiple"))
  "cortex_r4_alu_shift_reg")

;; An ALU instruction followed by an ALU instruction with no early dep.
(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
                  cortex_r4_mov"
               "cortex_r4_alu")
(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
                  cortex_r4_mov"
               "cortex_r4_alu_shift"
               "arm_no_early_alu_shift_dep")
(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
                  cortex_r4_mov"
               "cortex_r4_alu_shift_reg"
               "arm_no_early_alu_shift_value_dep")

;; In terms of availabilities, a consumer mov could theoretically be
;; issued together with a producer ALU instruction, without stalls.
;; In practice this cannot happen because mov;add (in that order) is not
;; eligible for dual issue and furthermore dual issue is not permitted
;; when a dependency is involved.  We therefore note it as latency one.
;; A mov followed by another of the same is also latency one.
(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
                  cortex_r4_mov"
               "cortex_r4_mov")

;; qadd, qdadd, qsub and qdsub are not currently emitted, and neither are
;; media data processing instructions nor sad instructions.

;; Multiplication instructions.

(define_insn_reservation "cortex_r4_mul_4" 4
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "mul,smmul"))
  "cortex_r4_mul_2")

(define_insn_reservation "cortex_r4_mul_3" 3
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "smulxy,smulwy,smuad,smusd"))
  "cortex_r4_mul")

(define_insn_reservation "cortex_r4_mla_4" 4
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "mla,smmla"))
  "cortex_r4_mul_2")

(define_insn_reservation "cortex_r4_mla_3" 3
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "smlaxy,smlawy,smlad,smlsd"))
  "cortex_r4_mul")

(define_insn_reservation "cortex_r4_smlald" 3
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "smlald,smlsld"))
  "cortex_r4_mul")

(define_insn_reservation "cortex_r4_mull" 4
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "smull,umull,umlal,umaal"))
  "cortex_r4_mul_2")

;; A multiply or an MLA with a single-register result, followed by an
;; MLA with an accumulator dependency, has its result forwarded.
(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3"
               "cortex_r4_mla_3,cortex_r4_mla_4"
               "arm_mac_accumulator_is_mul_result")

(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4"
               "cortex_r4_mla_3,cortex_r4_mla_4"
               "arm_mac_accumulator_is_mul_result")

;; A multiply followed by an ALU instruction needing the multiply
;; result only at ALU has lower latency than one needing it at Shift.
(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
               "cortex_r4_alu")
(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
               "cortex_r4_alu_shift"
               "arm_no_early_alu_shift_dep")
(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
               "cortex_r4_alu_shift_reg"
               "arm_no_early_alu_shift_value_dep")
(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
               "cortex_r4_alu")
(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
               "cortex_r4_alu_shift"
               "arm_no_early_alu_shift_dep")
(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
               "cortex_r4_alu_shift_reg"
               "arm_no_early_alu_shift_value_dep")

;; A multiply followed by a mov has one cycle lower latency again.
(define_bypass 1 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
               "cortex_r4_mov")
(define_bypass 2 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
               "cortex_r4_mov")

;; We guess that division of A/B using sdiv or udiv, on average, 
;; is performed with B having ten more leading zeros than A.
;; This gives a latency of nine for udiv and ten for sdiv.
(define_insn_reservation "cortex_r4_udiv" 9
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "udiv"))
  "cortex_r4_div_9")

(define_insn_reservation "cortex_r4_sdiv" 10
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "sdiv"))
  "cortex_r4_div_10")

;; Branches.  We assume correct prediction.

(define_insn_reservation "cortex_r4_branch" 0
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "branch"))
  "cortex_r4_branch")

;; Call latencies are not predictable.  A semi-arbitrary very large
;; number is used as "positive infinity" so that everything should be
;; finished by the time of return.
(define_insn_reservation "cortex_r4_call" 32
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "call"))
  "nothing")

;; Status register access instructions are not currently emitted.

;; Load instructions.
;; We do not model the "addr_md_3cycle" cases and assume that
;; accesses following are correctly aligned.

(define_insn_reservation "cortex_r4_load_1_2" 3
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "load_4,load_8"))
  "cortex_r4_load_store")

(define_insn_reservation "cortex_r4_load_3_4" 4
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "load_12,load_16"))
  "cortex_r4_load_store_2")

;; If a producing load is followed by an instruction consuming only
;; as a Normal Reg, there is one fewer cycle of latency.

(define_bypass 2 "cortex_r4_load_1_2"
               "cortex_r4_alu")
(define_bypass 2 "cortex_r4_load_1_2"
               "cortex_r4_alu_shift"
               "arm_no_early_alu_shift_dep")
(define_bypass 2 "cortex_r4_load_1_2"
               "cortex_r4_alu_shift_reg"
               "arm_no_early_alu_shift_value_dep")

(define_bypass 3 "cortex_r4_load_3_4"
               "cortex_r4_alu")
(define_bypass 3 "cortex_r4_load_3_4"
               "cortex_r4_alu_shift"
               "arm_no_early_alu_shift_dep")
(define_bypass 3 "cortex_r4_load_3_4"
               "cortex_r4_alu_shift_reg"
               "arm_no_early_alu_shift_value_dep")

;; If a producing load is followed by an instruction consuming only
;; as a Late Reg, there are two fewer cycles of latency.  Such consumer
;; instructions are moves and stores.

(define_bypass 1 "cortex_r4_load_1_2"
               "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
(define_bypass 2 "cortex_r4_load_3_4"
               "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")

;; If a producer's result is required as the base or offset of a load,
;; there is an extra cycle latency.

(define_bypass 3 "cortex_r4_alu,cortex_r4_mov,cortex_r4_alu_shift,\
                  cortex_r4_alu_shift_reg"
               "cortex_r4_load_1_2,cortex_r4_load_3_4")

(define_bypass 4 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
               "cortex_r4_load_1_2,cortex_r4_load_3_4")

(define_bypass 5 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
               "cortex_r4_load_1_2,cortex_r4_load_3_4")

;; Store instructions.

(define_insn_reservation "cortex_r4_store_1_2" 0
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "store_4,store_8"))
  "cortex_r4_load_store")

(define_insn_reservation "cortex_r4_store_3_4" 0
  (and (eq_attr "tune_cortexr4" "yes")
       (eq_attr "type" "store_12,store_16"))
  "cortex_r4_load_store_2")
Commit	Line	Data
bd4dc3cd	1	;; ARM Cortex-R4 scheduling description.
7adcbafe	2	;; Copyright (C) 2007-2022 Free Software Foundation, Inc.
bd4dc3cd PB	3	;; Contributed by CodeSourcery.
	4
	5	;; This file is part of GCC.
	6
874101ec PB	7	;; GCC is free software; you can redistribute it and/or modify it
	8	;; under the terms of the GNU General Public License as published
	9	;; by the Free Software Foundation; either version 3, or (at your
	10	;; option) any later version.
	11
bd4dc3cd PB	12	;; GCC is distributed in the hope that it will be useful, but WITHOUT
	13	;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
	14	;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
	15	;; License for more details.
	16
	17	;; You should have received a copy of the GNU General Public License
874101ec PB	18	;; along with GCC; see the file COPYING3. If not see
874101ec PB	19	;; <http://www.gnu.org/licenses/>.
bd4dc3cd PB	20
	21	(define_automaton "cortex_r4")
	22
	23	;; We approximate the dual-issue constraints of this core using four
	24	;; "issue units" and a reservation matrix as follows. The numbers indicate
	25	;; the instruction groups' preferences in order. Multiple entries for
	26	;; the same numbered preference indicate units that must be reserved
	27	;; together.
	28	;;
	29	;; Issue unit: A B C ALU
	30	;;
	31	;; ALU w/o reg shift 1st 2nd 1st and 2nd
	32	;; ALU w/ reg shift 1st 2nd 2nd 1st and 2nd
	33	;; Moves 1st 2nd 2nd
	34	;; Multiplication 1st 1st
	35	;; Division 1st 1st
	36	;; Load/store single 1st 1st
	37	;; Other load/store 1st 1st
	38	;; Branches 1st
	39
	40	(define_cpu_unit "cortex_r4_issue_a" "cortex_r4")
	41	(define_cpu_unit "cortex_r4_issue_b" "cortex_r4")
	42	(define_cpu_unit "cortex_r4_issue_c" "cortex_r4")
	43	(define_cpu_unit "cortex_r4_issue_alu" "cortex_r4")
	44
	45	(define_reservation "cortex_r4_alu"
	46	"(cortex_r4_issue_a+cortex_r4_issue_alu)\|\
	47	(cortex_r4_issue_b+cortex_r4_issue_alu)")
	48	(define_reservation "cortex_r4_alu_shift_reg"
	49	"(cortex_r4_issue_a+cortex_r4_issue_alu)\|\
	50	(cortex_r4_issue_b+cortex_r4_issue_c+\
	51	cortex_r4_issue_alu)")
	52	(define_reservation "cortex_r4_mov"
	53	"cortex_r4_issue_a\|(cortex_r4_issue_b+\
	54	cortex_r4_issue_alu)")
	55	(define_reservation "cortex_r4_mul" "cortex_r4_issue_a+cortex_r4_issue_alu")
	56	(define_reservation "cortex_r4_mul_2"
	57	"(cortex_r4_issue_a+cortex_r4_issue_alu)*2")
	58	;; Division instructions execute out-of-order with respect to the
	59	;; rest of the pipeline and only require reservations on their first and
	60	;; final cycles.
	61	(define_reservation "cortex_r4_div_9"
	62	"cortex_r4_issue_a+cortex_r4_issue_alu,\
	63	nothing*7,\
	64	cortex_r4_issue_a+cortex_r4_issue_alu")
	65	(define_reservation "cortex_r4_div_10"
	66	"cortex_r4_issue_a+cortex_r4_issue_alu,\
	67	nothing*8,\
	68	cortex_r4_issue_a+cortex_r4_issue_alu")
	69	(define_reservation "cortex_r4_load_store"
	70	"cortex_r4_issue_a+cortex_r4_issue_c")
	71	(define_reservation "cortex_r4_load_store_2"
	72	"(cortex_r4_issue_a+cortex_r4_issue_b)*2")
	73	(define_reservation "cortex_r4_branch" "cortex_r4_issue_b")
	74
	75	;; We assume that all instructions are unconditional.
	76
	77	;; Data processing instructions. Moves without shifts are kept separate
	78	;; for the purposes of the dual-issue constraints above.
	79	(define_insn_reservation "cortex_r4_alu" 2
51c69ddb	80	(and (eq_attr "tune_cortexr4" "yes")
6e4150e1	81	(eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
1d61feeb	82	alu_sreg,alus_sreg,logic_reg,logics_reg,\
6e4150e1	83	adc_imm,adcs_imm,adc_reg,adcs_reg,\
04ae06da	84	adr,bfm,clz,rbit,rev,\
6e4150e1	85	shift_imm,shift_reg,mvn_imm,mvn_reg"))
bd4dc3cd PB	86	"cortex_r4_alu")
	87
	88	(define_insn_reservation "cortex_r4_mov" 2
51c69ddb	89	(and (eq_attr "tune_cortexr4" "yes")
859abddd	90	(eq_attr "type" "mov_imm,mov_reg"))
bd4dc3cd PB	91	"cortex_r4_mov")
	92
	93	(define_insn_reservation "cortex_r4_alu_shift" 2
51c69ddb	94	(and (eq_attr "tune_cortexr4" "yes")
ae27ce51	95	(eq_attr "type" "alu_shift_imm_lsl_1to4,alu_shift_imm_other,alus_shift_imm,\
6e4150e1 JG	96	logic_shift_imm,logics_shift_imm,\
6e4150e1 JG	97	extend,mov_shift,mvn_shift"))
bd4dc3cd PB	98	"cortex_r4_alu")
	99
	100	(define_insn_reservation "cortex_r4_alu_shift_reg" 2
51c69ddb	101	(and (eq_attr "tune_cortexr4" "yes")
6e4150e1 JG	102	(eq_attr "type" "alu_shift_reg,alus_shift_reg,\
6e4150e1 JG	103	logic_shift_reg,logics_shift_reg,\
594726e4	104	mov_shift_reg,mvn_shift_reg,\
f62281dc	105	mrs,multiple"))
bd4dc3cd PB	106	"cortex_r4_alu_shift_reg")
	107
	108	;; An ALU instruction followed by an ALU instruction with no early dep.
	109	(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
	110	cortex_r4_mov"
	111	"cortex_r4_alu")
	112	(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
	113	cortex_r4_mov"
	114	"cortex_r4_alu_shift"
	115	"arm_no_early_alu_shift_dep")
	116	(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
	117	cortex_r4_mov"
	118	"cortex_r4_alu_shift_reg"
	119	"arm_no_early_alu_shift_value_dep")
	120
	121	;; In terms of availabilities, a consumer mov could theoretically be
	122	;; issued together with a producer ALU instruction, without stalls.
	123	;; In practice this cannot happen because mov;add (in that order) is not
	124	;; eligible for dual issue and furthermore dual issue is not permitted
	125	;; when a dependency is involved. We therefore note it as latency one.
	126	;; A mov followed by another of the same is also latency one.
	127	(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
	128	cortex_r4_mov"
	129	"cortex_r4_mov")
	130
	131	;; qadd, qdadd, qsub and qdsub are not currently emitted, and neither are
	132	;; media data processing instructions nor sad instructions.
	133
	134	;; Multiplication instructions.
	135
	136	(define_insn_reservation "cortex_r4_mul_4" 4
51c69ddb	137	(and (eq_attr "tune_cortexr4" "yes")
09485a08	138	(eq_attr "type" "mul,smmul"))
bd4dc3cd PB	139	"cortex_r4_mul_2")
	140
	141	(define_insn_reservation "cortex_r4_mul_3" 3
51c69ddb	142	(and (eq_attr "tune_cortexr4" "yes")
09485a08	143	(eq_attr "type" "smulxy,smulwy,smuad,smusd"))
bd4dc3cd PB	144	"cortex_r4_mul")
	145
	146	(define_insn_reservation "cortex_r4_mla_4" 4
51c69ddb	147	(and (eq_attr "tune_cortexr4" "yes")
09485a08	148	(eq_attr "type" "mla,smmla"))
bd4dc3cd PB	149	"cortex_r4_mul_2")
	150
	151	(define_insn_reservation "cortex_r4_mla_3" 3
51c69ddb	152	(and (eq_attr "tune_cortexr4" "yes")
09485a08	153	(eq_attr "type" "smlaxy,smlawy,smlad,smlsd"))
bd4dc3cd PB	154	"cortex_r4_mul")
	155
	156	(define_insn_reservation "cortex_r4_smlald" 3
51c69ddb	157	(and (eq_attr "tune_cortexr4" "yes")
09485a08	158	(eq_attr "type" "smlald,smlsld"))
bd4dc3cd PB	159	"cortex_r4_mul")
	160
	161	(define_insn_reservation "cortex_r4_mull" 4
51c69ddb	162	(and (eq_attr "tune_cortexr4" "yes")
09485a08	163	(eq_attr "type" "smull,umull,umlal,umaal"))
bd4dc3cd PB	164	"cortex_r4_mul_2")
	165
	166	;; A multiply or an MLA with a single-register result, followed by an
	167	;; MLA with an accumulator dependency, has its result forwarded.
	168	(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3"
	169	"cortex_r4_mla_3,cortex_r4_mla_4"
	170	"arm_mac_accumulator_is_mul_result")
	171
	172	(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4"
	173	"cortex_r4_mla_3,cortex_r4_mla_4"
	174	"arm_mac_accumulator_is_mul_result")
	175
	176	;; A multiply followed by an ALU instruction needing the multiply
	177	;; result only at ALU has lower latency than one needing it at Shift.
	178	(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
	179	"cortex_r4_alu")
	180	(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
	181	"cortex_r4_alu_shift"
	182	"arm_no_early_alu_shift_dep")
	183	(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
	184	"cortex_r4_alu_shift_reg"
	185	"arm_no_early_alu_shift_value_dep")
	186	(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
	187	"cortex_r4_alu")
	188	(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
	189	"cortex_r4_alu_shift"
	190	"arm_no_early_alu_shift_dep")
	191	(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
	192	"cortex_r4_alu_shift_reg"
	193	"arm_no_early_alu_shift_value_dep")
	194
	195	;; A multiply followed by a mov has one cycle lower latency again.
	196	(define_bypass 1 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
	197	"cortex_r4_mov")
	198	(define_bypass 2 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
	199	"cortex_r4_mov")
	200
	201	;; We guess that division of A/B using sdiv or udiv, on average,
	202	;; is performed with B having ten more leading zeros than A.
	203	;; This gives a latency of nine for udiv and ten for sdiv.
	204	(define_insn_reservation "cortex_r4_udiv" 9
51c69ddb	205	(and (eq_attr "tune_cortexr4" "yes")
09485a08	206	(eq_attr "type" "udiv"))
bd4dc3cd PB	207	"cortex_r4_div_9")
	208
	209	(define_insn_reservation "cortex_r4_sdiv" 10
51c69ddb	210	(and (eq_attr "tune_cortexr4" "yes")
09485a08	211	(eq_attr "type" "sdiv"))
bd4dc3cd PB	212	"cortex_r4_div_10")
	213
	214	;; Branches. We assume correct prediction.
	215
	216	(define_insn_reservation "cortex_r4_branch" 0
51c69ddb	217	(and (eq_attr "tune_cortexr4" "yes")
bd4dc3cd PB	218	(eq_attr "type" "branch"))
	219	"cortex_r4_branch")
	220
	221	;; Call latencies are not predictable. A semi-arbitrary very large
	222	;; number is used as "positive infinity" so that everything should be
	223	;; finished by the time of return.
	224	(define_insn_reservation "cortex_r4_call" 32
51c69ddb	225	(and (eq_attr "tune_cortexr4" "yes")
bd4dc3cd PB	226	(eq_attr "type" "call"))
	227	"nothing")
	228
	229	;; Status register access instructions are not currently emitted.
	230
	231	;; Load instructions.
	232	;; We do not model the "addr_md_3cycle" cases and assume that
	233	;; accesses following are correctly aligned.
	234
	235	(define_insn_reservation "cortex_r4_load_1_2" 3
51c69ddb	236	(and (eq_attr "tune_cortexr4" "yes")
89b2133e	237	(eq_attr "type" "load_4,load_8"))
bd4dc3cd PB	238	"cortex_r4_load_store")
	239
	240	(define_insn_reservation "cortex_r4_load_3_4" 4
51c69ddb	241	(and (eq_attr "tune_cortexr4" "yes")
89b2133e	242	(eq_attr "type" "load_12,load_16"))
bd4dc3cd PB	243	"cortex_r4_load_store_2")
	244
	245	;; If a producing load is followed by an instruction consuming only
	246	;; as a Normal Reg, there is one fewer cycle of latency.
	247
	248	(define_bypass 2 "cortex_r4_load_1_2"
	249	"cortex_r4_alu")
	250	(define_bypass 2 "cortex_r4_load_1_2"
	251	"cortex_r4_alu_shift"
	252	"arm_no_early_alu_shift_dep")
	253	(define_bypass 2 "cortex_r4_load_1_2"
	254	"cortex_r4_alu_shift_reg"
	255	"arm_no_early_alu_shift_value_dep")
	256
	257	(define_bypass 3 "cortex_r4_load_3_4"
	258	"cortex_r4_alu")
	259	(define_bypass 3 "cortex_r4_load_3_4"
	260	"cortex_r4_alu_shift"
	261	"arm_no_early_alu_shift_dep")
	262	(define_bypass 3 "cortex_r4_load_3_4"
	263	"cortex_r4_alu_shift_reg"
	264	"arm_no_early_alu_shift_value_dep")
	265
	266	;; If a producing load is followed by an instruction consuming only
	267	;; as a Late Reg, there are two fewer cycles of latency. Such consumer
	268	;; instructions are moves and stores.
	269
	270	(define_bypass 1 "cortex_r4_load_1_2"
	271	"cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
	272	(define_bypass 2 "cortex_r4_load_3_4"
	273	"cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
	274
	275	;; If a producer's result is required as the base or offset of a load,
	276	;; there is an extra cycle latency.
	277
	278	(define_bypass 3 "cortex_r4_alu,cortex_r4_mov,cortex_r4_alu_shift,\
	279	cortex_r4_alu_shift_reg"
	280	"cortex_r4_load_1_2,cortex_r4_load_3_4")
	281
	282	(define_bypass 4 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
	283	"cortex_r4_load_1_2,cortex_r4_load_3_4")
	284
	285	(define_bypass 5 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
	286	"cortex_r4_load_1_2,cortex_r4_load_3_4")
	287
	288	;; Store instructions.
	289
	290	(define_insn_reservation "cortex_r4_store_1_2" 0
51c69ddb	291	(and (eq_attr "tune_cortexr4" "yes")
89b2133e	292	(eq_attr "type" "store_4,store_8"))
bd4dc3cd PB	293	"cortex_r4_load_store")
	294
	295	(define_insn_reservation "cortex_r4_store_3_4" 0
51c69ddb	296	(and (eq_attr "tune_cortexr4" "yes")
89b2133e	297	(eq_attr "type" "store_12,store_16"))
bd4dc3cd PB	298	"cortex_r4_load_store_2")
bd4dc3cd PB	299