]>
Commit | Line | Data |
---|---|---|
4542a38a GY |
1 | ;; ARM ldrd/strd peephole optimizations. |
2 | ;; | |
a945c346 | 3 | ;; Copyright (C) 2013-2024 Free Software Foundation, Inc. |
4542a38a GY |
4 | ;; |
5 | ;; Written by Greta Yorsh <greta.yorsh@arm.com> | |
6 | ||
7 | ;; This file is part of GCC. | |
8 | ;; | |
9 | ;; GCC is free software; you can redistribute it and/or modify it | |
10 | ;; under the terms of the GNU General Public License as published by | |
11 | ;; the Free Software Foundation; either version 3, or (at your option) | |
12 | ;; any later version. | |
13 | ;; | |
14 | ;; GCC is distributed in the hope that it will be useful, but | |
15 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | ;; General Public License for more details. | |
18 | ;; | |
19 | ;; You should have received a copy of the GNU General Public License | |
20 | ;; along with GCC; see the file COPYING3. If not see | |
21 | ;; <http://www.gnu.org/licenses/>. | |
22 | ||
23 | ;; The following peephole optimizations identify consecutive memory | |
24 | ;; accesses, and try to rearrange the operands to enable generation of | |
25 | ;; ldrd/strd. | |
c272bbda MM |
26 | ;; |
27 | ;; In many cases they behave in the same way that patterns in ldmstm.md behave, | |
28 | ;; but there is extra logic in gen_operands_ldrd_strd to try and ensure the | |
29 | ;; registers used are an (r<N>, r<N + 1>) pair where N is even. | |
4542a38a GY |
30 | |
31 | (define_peephole2 ; ldrd | |
32 | [(set (match_operand:SI 0 "arm_general_register_operand" "") | |
c272bbda | 33 | (match_operand:SI 2 "memory_operand" "")) |
4542a38a | 34 | (set (match_operand:SI 1 "arm_general_register_operand" "") |
c272bbda | 35 | (match_operand:SI 3 "memory_operand" ""))] |
2fe37211 | 36 | "TARGET_LDRD" |
c272bbda MM |
37 | [(parallel [(set (match_dup 0) (match_dup 2)) |
38 | (set (match_dup 1) (match_dup 3))])] | |
4542a38a GY |
39 | { |
40 | if (!gen_operands_ldrd_strd (operands, true, false, false)) | |
41 | FAIL; | |
4542a38a GY |
42 | }) |
43 | ||
44 | (define_peephole2 ; strd | |
45 | [(set (match_operand:SI 2 "memory_operand" "") | |
46 | (match_operand:SI 0 "arm_general_register_operand" "")) | |
47 | (set (match_operand:SI 3 "memory_operand" "") | |
48 | (match_operand:SI 1 "arm_general_register_operand" ""))] | |
2fe37211 | 49 | "TARGET_LDRD" |
c272bbda MM |
50 | [(parallel [(set (match_dup 2) (match_dup 0)) |
51 | (set (match_dup 3) (match_dup 1))])] | |
4542a38a GY |
52 | { |
53 | if (!gen_operands_ldrd_strd (operands, false, false, false)) | |
54 | FAIL; | |
4542a38a GY |
55 | }) |
56 | ||
57 | ;; The following peepholes reorder registers to enable LDRD/STRD. | |
58 | (define_peephole2 ; strd of constants | |
59 | [(set (match_operand:SI 0 "arm_general_register_operand" "") | |
c272bbda | 60 | (match_operand:SI 4 "const_int_operand" "")) |
4542a38a | 61 | (set (match_operand:SI 2 "memory_operand" "") |
c272bbda | 62 | (match_dup 0)) |
4542a38a | 63 | (set (match_operand:SI 1 "arm_general_register_operand" "") |
c272bbda | 64 | (match_operand:SI 5 "const_int_operand" "")) |
4542a38a | 65 | (set (match_operand:SI 3 "memory_operand" "") |
c272bbda | 66 | (match_dup 1))] |
2fe37211 | 67 | "TARGET_LDRD" |
c272bbda MM |
68 | [(set (match_dup 0) (match_dup 4)) |
69 | (set (match_dup 1) (match_dup 5)) | |
70 | (parallel [(set (match_dup 2) (match_dup 0)) | |
71 | (set (match_dup 3) (match_dup 1))])] | |
4542a38a GY |
72 | { |
73 | if (!gen_operands_ldrd_strd (operands, false, true, false)) | |
74 | FAIL; | |
4542a38a GY |
75 | }) |
76 | ||
77 | (define_peephole2 ; strd of constants | |
78 | [(set (match_operand:SI 0 "arm_general_register_operand" "") | |
c272bbda | 79 | (match_operand:SI 4 "const_int_operand" "")) |
4542a38a | 80 | (set (match_operand:SI 1 "arm_general_register_operand" "") |
c272bbda | 81 | (match_operand:SI 5 "const_int_operand" "")) |
4542a38a | 82 | (set (match_operand:SI 2 "memory_operand" "") |
c272bbda | 83 | (match_dup 0)) |
4542a38a | 84 | (set (match_operand:SI 3 "memory_operand" "") |
c272bbda | 85 | (match_dup 1))] |
2fe37211 | 86 | "TARGET_LDRD" |
c272bbda MM |
87 | [(set (match_dup 0) (match_dup 4)) |
88 | (set (match_dup 1) (match_dup 5)) | |
89 | (parallel [(set (match_dup 2) (match_dup 0)) | |
90 | (set (match_dup 3) (match_dup 1))])] | |
4542a38a GY |
91 | { |
92 | if (!gen_operands_ldrd_strd (operands, false, true, false)) | |
93 | FAIL; | |
4542a38a GY |
94 | }) |
95 | ||
96 | ;; The following two peephole optimizations are only relevant for ARM | |
97 | ;; mode where LDRD/STRD require consecutive registers. | |
98 | ||
99 | (define_peephole2 ; swap the destination registers of two loads | |
100 | ; before a commutative operation. | |
101 | [(set (match_operand:SI 0 "arm_general_register_operand" "") | |
c272bbda | 102 | (match_operand:SI 2 "memory_operand" "")) |
4542a38a | 103 | (set (match_operand:SI 1 "arm_general_register_operand" "") |
c272bbda | 104 | (match_operand:SI 3 "memory_operand" "")) |
4542a38a | 105 | (set (match_operand:SI 4 "arm_general_register_operand" "") |
c272bbda | 106 | (match_operator:SI 5 "commutative_binary_operator" |
4542a38a GY |
107 | [(match_operand 6 "arm_general_register_operand" "") |
108 | (match_operand 7 "arm_general_register_operand" "") ]))] | |
109 | "TARGET_LDRD && TARGET_ARM | |
4542a38a | 110 | && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7]))) |
c272bbda | 111 | ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6])))) |
4542a38a GY |
112 | && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) |
113 | && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))" | |
c272bbda MM |
114 | [(parallel [(set (match_dup 0) (match_dup 2)) |
115 | (set (match_dup 1) (match_dup 3))]) | |
4542a38a | 116 | (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))] |
c272bbda MM |
117 | { |
118 | if (!gen_operands_ldrd_strd (operands, true, false, true)) | |
119 | FAIL; | |
120 | }) | |
4542a38a GY |
121 | |
122 | (define_peephole2 ; swap the destination registers of two loads | |
123 | ; before a commutative operation that sets the flags. | |
124 | [(set (match_operand:SI 0 "arm_general_register_operand" "") | |
c272bbda | 125 | (match_operand:SI 2 "memory_operand" "")) |
4542a38a | 126 | (set (match_operand:SI 1 "arm_general_register_operand" "") |
c272bbda | 127 | (match_operand:SI 3 "memory_operand" "")) |
4542a38a GY |
128 | (parallel |
129 | [(set (match_operand:SI 4 "arm_general_register_operand" "") | |
130 | (match_operator:SI 5 "commutative_binary_operator" | |
131 | [(match_operand 6 "arm_general_register_operand" "") | |
132 | (match_operand 7 "arm_general_register_operand" "") ])) | |
133 | (clobber (reg:CC CC_REGNUM))])] | |
134 | "TARGET_LDRD && TARGET_ARM | |
4542a38a GY |
135 | && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7]))) |
136 | ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6])))) | |
137 | && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) | |
138 | && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))" | |
c272bbda MM |
139 | [(parallel [(set (match_dup 0) (match_dup 2)) |
140 | (set (match_dup 1) (match_dup 3))]) | |
4542a38a GY |
141 | (parallel |
142 | [(set (match_dup 4) | |
143 | (match_op_dup 5 [(match_dup 6) (match_dup 7)])) | |
144 | (clobber (reg:CC CC_REGNUM))])] | |
c272bbda MM |
145 | { |
146 | if (!gen_operands_ldrd_strd (operands, true, false, true)) | |
147 | FAIL; | |
148 | }) | |
4542a38a GY |
149 | |
150 | ;; TODO: Handle LDRD/STRD with writeback: | |
151 | ;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY | |
152 | ;; (b) Patterns may be followed by an update of the base address. | |
c272bbda MM |
153 | |
154 | ||
155 | ;; insns matching the LDRD/STRD patterns that will get created by the above | |
156 | ;; peepholes. | |
157 | ;; We use gen_operands_ldrd_strd() with a modify argument as false so that the | |
158 | ;; operands are not changed. | |
159 | (define_insn "*arm_ldrd" | |
02204940 | 160 | [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r") |
c272bbda | 161 | (match_operand:SI 2 "memory_operand" "m")) |
8be53488 | 162 | (set (match_operand:SI 1 "s_register_operand" "=rk") |
c272bbda MM |
163 | (match_operand:SI 3 "memory_operand" "m"))])] |
164 | "TARGET_LDRD && TARGET_ARM && reload_completed | |
165 | && valid_operands_ldrd_strd (operands, true)" | |
166 | { | |
167 | rtx op[2]; | |
168 | op[0] = gen_rtx_REG (DImode, REGNO (operands[0])); | |
169 | op[1] = adjust_address (operands[2], DImode, 0); | |
170 | return output_move_double (op, true, NULL); | |
171 | } | |
172 | [(set (attr "length") | |
173 | (symbol_ref "arm_count_ldrdstrd_insns (operands, true) * 4")) | |
174 | (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4")) | |
175 | (set_attr "type" "load_8") | |
176 | (set_attr "predicable" "yes")] | |
177 | ) | |
178 | ||
179 | (define_insn "*arm_strd" | |
180 | [(parallel [(set (match_operand:SI 2 "memory_operand" "=m") | |
02204940 | 181 | (match_operand:SI 0 "s_register_operand" "r")) |
c272bbda | 182 | (set (match_operand:SI 3 "memory_operand" "=m") |
8be53488 | 183 | (match_operand:SI 1 "s_register_operand" "rk"))])] |
c272bbda MM |
184 | "TARGET_LDRD && TARGET_ARM && reload_completed |
185 | && valid_operands_ldrd_strd (operands, false)" | |
186 | { | |
187 | rtx op[2]; | |
188 | op[0] = adjust_address (operands[2], DImode, 0); | |
189 | op[1] = gen_rtx_REG (DImode, REGNO (operands[0])); | |
190 | return output_move_double (op, true, NULL); | |
191 | } | |
192 | [(set (attr "length") | |
193 | (symbol_ref "arm_count_ldrdstrd_insns (operands, false) * 4")) | |
194 | (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4")) | |
195 | (set_attr "type" "store_8") | |
196 | (set_attr "predicable" "yes")] | |
197 | ) |