]>
Commit | Line | Data |
---|---|---|
8e701300 | 1 | /* Helper routines for memory move and comparison insns. |
5624e564 | 2 | Copyright (C) 2013-2015 Free Software Foundation, Inc. |
8e701300 CB |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "tm.h" | |
e11c4407 AM |
24 | #include "function.h" |
25 | #include "basic-block.h" | |
8e701300 CB |
26 | #include "rtl.h" |
27 | #include "tree.h" | |
e11c4407 | 28 | #include "tm_p.h" |
36566b39 | 29 | #include "emit-rtl.h" |
e11c4407 | 30 | #include "explow.h" |
8e701300 | 31 | #include "expr.h" |
8e701300 CB |
32 | |
33 | /* Like force_operand, but guarantees that VALUE ends up in TARGET. */ | |
34 | static void | |
35 | force_into (rtx value, rtx target) | |
36 | { | |
37 | value = force_operand (value, target); | |
38 | if (! rtx_equal_p (value, target)) | |
39 | emit_insn (gen_move_insn (target, value)); | |
40 | } | |
41 | ||
42 | /* Emit code to perform a block move. Choose the best method. | |
43 | ||
44 | OPERANDS[0] is the destination. | |
45 | OPERANDS[1] is the source. | |
46 | OPERANDS[2] is the size. | |
47 | OPERANDS[3] is the alignment safe to use. */ | |
48 | bool | |
49 | expand_block_move (rtx *operands) | |
50 | { | |
51 | int align = INTVAL (operands[3]); | |
52 | int constp = (CONST_INT_P (operands[2])); | |
53 | int bytes = (constp ? INTVAL (operands[2]) : 0); | |
54 | ||
55 | if (! constp) | |
56 | return false; | |
57 | ||
58 | /* If we could use mov.l to move words and dest is word-aligned, we | |
59 | can use movua.l for loads and still generate a relatively short | |
60 | and efficient sequence. */ | |
f3ca7111 | 61 | if (TARGET_SH4A && align < 4 |
8e701300 CB |
62 | && MEM_ALIGN (operands[0]) >= 32 |
63 | && can_move_by_pieces (bytes, 32)) | |
64 | { | |
65 | rtx dest = copy_rtx (operands[0]); | |
66 | rtx src = copy_rtx (operands[1]); | |
67 | /* We could use different pseudos for each copied word, but | |
68 | since movua can only load into r0, it's kind of | |
69 | pointless. */ | |
70 | rtx temp = gen_reg_rtx (SImode); | |
71 | rtx src_addr = copy_addr_to_reg (XEXP (src, 0)); | |
72 | int copied = 0; | |
73 | ||
74 | while (copied + 4 <= bytes) | |
75 | { | |
76 | rtx to = adjust_address (dest, SImode, copied); | |
77 | rtx from = adjust_automodify_address (src, BLKmode, | |
78 | src_addr, copied); | |
79 | ||
80 | set_mem_size (from, 4); | |
81 | emit_insn (gen_movua (temp, from)); | |
82 | emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4)); | |
83 | emit_move_insn (to, temp); | |
84 | copied += 4; | |
85 | } | |
86 | ||
87 | if (copied < bytes) | |
88 | move_by_pieces (adjust_address (dest, BLKmode, copied), | |
89 | adjust_automodify_address (src, BLKmode, | |
90 | src_addr, copied), | |
91 | bytes - copied, align, 0); | |
92 | ||
93 | return true; | |
94 | } | |
95 | ||
96 | /* If it isn't a constant number of bytes, or if it doesn't have 4 byte | |
97 | alignment, or if it isn't a multiple of 4 bytes, then fail. */ | |
98 | if (align < 4 || (bytes % 4 != 0)) | |
99 | return false; | |
100 | ||
101 | if (TARGET_HARD_SH4) | |
102 | { | |
103 | if (bytes < 12) | |
104 | return false; | |
105 | else if (bytes == 12) | |
106 | { | |
107 | rtx func_addr_rtx = gen_reg_rtx (Pmode); | |
108 | rtx r4 = gen_rtx_REG (SImode, 4); | |
109 | rtx r5 = gen_rtx_REG (SImode, 5); | |
110 | ||
111 | function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC); | |
112 | force_into (XEXP (operands[0], 0), r4); | |
113 | force_into (XEXP (operands[1], 0), r5); | |
114 | emit_insn (gen_block_move_real_i4 (func_addr_rtx)); | |
115 | return true; | |
116 | } | |
117 | else if (! optimize_size) | |
118 | { | |
119 | const char *entry_name; | |
120 | rtx func_addr_rtx = gen_reg_rtx (Pmode); | |
121 | int dwords; | |
122 | rtx r4 = gen_rtx_REG (SImode, 4); | |
123 | rtx r5 = gen_rtx_REG (SImode, 5); | |
124 | rtx r6 = gen_rtx_REG (SImode, 6); | |
125 | ||
126 | entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even"); | |
127 | function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC); | |
128 | force_into (XEXP (operands[0], 0), r4); | |
129 | force_into (XEXP (operands[1], 0), r5); | |
130 | ||
131 | dwords = bytes >> 3; | |
132 | emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1))); | |
133 | emit_insn (gen_block_lump_real_i4 (func_addr_rtx)); | |
134 | return true; | |
135 | } | |
136 | else | |
137 | return false; | |
138 | } | |
139 | if (bytes < 64) | |
140 | { | |
141 | char entry[30]; | |
142 | rtx func_addr_rtx = gen_reg_rtx (Pmode); | |
143 | rtx r4 = gen_rtx_REG (SImode, 4); | |
144 | rtx r5 = gen_rtx_REG (SImode, 5); | |
145 | ||
146 | sprintf (entry, "__movmemSI%d", bytes); | |
147 | function_symbol (func_addr_rtx, entry, SFUNC_STATIC); | |
148 | force_into (XEXP (operands[0], 0), r4); | |
149 | force_into (XEXP (operands[1], 0), r5); | |
150 | emit_insn (gen_block_move_real (func_addr_rtx)); | |
151 | return true; | |
152 | } | |
153 | ||
154 | /* This is the same number of bytes as a memcpy call, but to a different | |
155 | less common function name, so this will occasionally use more space. */ | |
156 | if (! optimize_size) | |
157 | { | |
158 | rtx func_addr_rtx = gen_reg_rtx (Pmode); | |
159 | int final_switch, while_loop; | |
160 | rtx r4 = gen_rtx_REG (SImode, 4); | |
161 | rtx r5 = gen_rtx_REG (SImode, 5); | |
162 | rtx r6 = gen_rtx_REG (SImode, 6); | |
163 | ||
164 | function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC); | |
165 | force_into (XEXP (operands[0], 0), r4); | |
166 | force_into (XEXP (operands[1], 0), r5); | |
167 | ||
168 | /* r6 controls the size of the move. 16 is decremented from it | |
169 | for each 64 bytes moved. Then the negative bit left over is used | |
170 | as an index into a list of move instructions. e.g., a 72 byte move | |
171 | would be set up with size(r6) = 14, for one iteration through the | |
172 | big while loop, and a switch of -2 for the last part. */ | |
173 | ||
174 | final_switch = 16 - ((bytes / 4) % 16); | |
175 | while_loop = ((bytes / 4) / 16 - 1) * 16; | |
176 | emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch))); | |
177 | emit_insn (gen_block_lump_real (func_addr_rtx)); | |
178 | return true; | |
179 | } | |
180 | ||
181 | return false; | |
182 | } | |
183 | ||
45731f37 OE |
184 | static const int prob_unlikely = REG_BR_PROB_BASE / 10; |
185 | static const int prob_likely = REG_BR_PROB_BASE / 4; | |
3a1a7897 | 186 | |
8e701300 CB |
187 | /* Emit code to perform a strcmp. |
188 | ||
189 | OPERANDS[0] is the destination. | |
190 | OPERANDS[1] is the first string. | |
191 | OPERANDS[2] is the second string. | |
3a1a7897 | 192 | OPERANDS[3] is the known alignment. */ |
8e701300 CB |
193 | bool |
194 | sh_expand_cmpstr (rtx *operands) | |
195 | { | |
3a1a7897 CB |
196 | rtx addr1 = operands[1]; |
197 | rtx addr2 = operands[2]; | |
198 | rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); | |
199 | rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); | |
8e701300 CB |
200 | rtx tmp0 = gen_reg_rtx (SImode); |
201 | rtx tmp1 = gen_reg_rtx (SImode); | |
202 | rtx tmp2 = gen_reg_rtx (SImode); | |
203 | rtx tmp3 = gen_reg_rtx (SImode); | |
204 | ||
3a1a7897 | 205 | rtx jump; |
19f8b229 TS |
206 | rtx_code_label *L_return = gen_label_rtx (); |
207 | rtx_code_label *L_loop_byte = gen_label_rtx (); | |
208 | rtx_code_label *L_end_loop_byte = gen_label_rtx (); | |
209 | rtx_code_label *L_loop_long = gen_label_rtx (); | |
210 | rtx_code_label *L_end_loop_long = gen_label_rtx (); | |
8e701300 | 211 | |
f700c7ca OE |
212 | const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT; |
213 | const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT; | |
8e701300 | 214 | |
f700c7ca | 215 | if (addr1_alignment < 4 && addr2_alignment < 4) |
ca494b8d CB |
216 | { |
217 | emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr)); | |
3b140613 | 218 | emit_insn (gen_tstsi_t (tmp1, GEN_INT (3))); |
ca494b8d CB |
219 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); |
220 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
221 | } | |
f700c7ca OE |
222 | else if (addr1_alignment < 4 && addr2_alignment >= 4) |
223 | { | |
224 | emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3))); | |
225 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
226 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
227 | } | |
228 | else if (addr1_alignment >= 4 && addr2_alignment < 4) | |
229 | { | |
230 | emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3))); | |
231 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
232 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
233 | } | |
8e701300 | 234 | |
3a1a7897 CB |
235 | addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); |
236 | addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); | |
8e701300 CB |
237 | |
238 | /* tmp2 is aligned, OK to load. */ | |
239 | emit_move_insn (tmp3, addr2); | |
240 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4)); | |
241 | ||
45731f37 | 242 | /* start long loop. */ |
8e701300 CB |
243 | emit_label (L_loop_long); |
244 | ||
245 | emit_move_insn (tmp2, tmp3); | |
246 | ||
247 | /* tmp1 is aligned, OK to load. */ | |
248 | emit_move_insn (tmp1, addr1); | |
249 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4)); | |
250 | ||
251 | /* Is there a 0 byte ? */ | |
252 | emit_insn (gen_andsi3 (tmp3, tmp3, tmp1)); | |
253 | ||
254 | emit_insn (gen_cmpstr_t (tmp0, tmp3)); | |
255 | jump = emit_jump_insn (gen_branch_true (L_end_loop_long)); | |
256 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
257 | ||
258 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
259 | ||
260 | /* tmp2 is aligned, OK to load. */ | |
261 | emit_move_insn (tmp3, addr2); | |
262 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4)); | |
263 | ||
264 | jump = emit_jump_insn (gen_branch_true (L_loop_long)); | |
265 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
266 | /* end loop. */ | |
267 | ||
ca494b8d | 268 | /* Fallthu, substract words. */ |
8e701300 CB |
269 | if (TARGET_LITTLE_ENDIAN) |
270 | { | |
271 | rtx low_1 = gen_lowpart (HImode, tmp1); | |
272 | rtx low_2 = gen_lowpart (HImode, tmp2); | |
273 | ||
274 | emit_insn (gen_rotlhi3_8 (low_1, low_1)); | |
275 | emit_insn (gen_rotlhi3_8 (low_2, low_2)); | |
276 | emit_insn (gen_rotlsi3_16 (tmp1, tmp1)); | |
277 | emit_insn (gen_rotlsi3_16 (tmp2, tmp2)); | |
278 | emit_insn (gen_rotlhi3_8 (low_1, low_1)); | |
279 | emit_insn (gen_rotlhi3_8 (low_2, low_2)); | |
280 | } | |
281 | ||
282 | jump = emit_jump_insn (gen_jump_compact (L_return)); | |
283 | emit_barrier_after (jump); | |
284 | ||
8e701300 CB |
285 | emit_label (L_end_loop_long); |
286 | ||
287 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4)); | |
288 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4)); | |
289 | ||
ca494b8d | 290 | /* start byte loop. */ |
3a1a7897 CB |
291 | addr1 = adjust_address (addr1, QImode, 0); |
292 | addr2 = adjust_address (addr2, QImode, 0); | |
ca494b8d | 293 | |
8e701300 CB |
294 | emit_label (L_loop_byte); |
295 | ||
296 | emit_insn (gen_extendqisi2 (tmp2, addr2)); | |
297 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1)); | |
298 | ||
299 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
300 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1)); | |
301 | ||
302 | emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); | |
303 | jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); | |
304 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
305 | ||
306 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
ca494b8d CB |
307 | if (flag_delayed_branch) |
308 | emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); | |
309 | jump = emit_jump_insn (gen_branch_true (L_loop_byte)); | |
8e701300 CB |
310 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); |
311 | /* end loop. */ | |
312 | ||
313 | emit_label (L_end_loop_byte); | |
314 | ||
ca494b8d CB |
315 | if (! flag_delayed_branch) |
316 | emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); | |
8e701300 CB |
317 | emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1))); |
318 | ||
319 | emit_label (L_return); | |
320 | ||
321 | emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); | |
322 | ||
323 | return true; | |
324 | } | |
325 | ||
ca494b8d CB |
326 | /* Emit code to perform a strncmp. |
327 | ||
328 | OPERANDS[0] is the destination. | |
329 | OPERANDS[1] is the first string. | |
330 | OPERANDS[2] is the second string. | |
331 | OPERANDS[3] is the length. | |
3a1a7897 | 332 | OPERANDS[4] is the known alignment. */ |
ca494b8d CB |
333 | bool |
334 | sh_expand_cmpnstr (rtx *operands) | |
335 | { | |
3a1a7897 CB |
336 | rtx addr1 = operands[1]; |
337 | rtx addr2 = operands[2]; | |
338 | rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); | |
339 | rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); | |
ca494b8d CB |
340 | rtx tmp1 = gen_reg_rtx (SImode); |
341 | rtx tmp2 = gen_reg_rtx (SImode); | |
342 | ||
3a1a7897 | 343 | rtx jump; |
19f8b229 TS |
344 | rtx_code_label *L_return = gen_label_rtx (); |
345 | rtx_code_label *L_loop_byte = gen_label_rtx (); | |
346 | rtx_code_label *L_end_loop_byte = gen_label_rtx (); | |
ca494b8d | 347 | |
ca494b8d | 348 | rtx len = force_reg (SImode, operands[3]); |
eae298d6 | 349 | int constp = CONST_INT_P (operands[3]); |
ca494b8d | 350 | |
f700c7ca OE |
351 | const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT; |
352 | const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT; | |
353 | ||
45731f37 | 354 | /* Loop on a register count. */ |
eae298d6 | 355 | if (constp) |
ca494b8d | 356 | { |
eae298d6 | 357 | rtx tmp0 = gen_reg_rtx (SImode); |
ca494b8d CB |
358 | rtx tmp3 = gen_reg_rtx (SImode); |
359 | rtx lenw = gen_reg_rtx (SImode); | |
ca494b8d | 360 | |
19f8b229 TS |
361 | rtx_code_label *L_loop_long = gen_label_rtx (); |
362 | rtx_code_label *L_end_loop_long = gen_label_rtx (); | |
ca494b8d | 363 | |
eae298d6 CB |
364 | int bytes = INTVAL (operands[3]); |
365 | int witers = bytes / 4; | |
366 | ||
367 | if (witers > 1) | |
45731f37 OE |
368 | { |
369 | addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); | |
370 | addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); | |
371 | ||
372 | emit_move_insn (tmp0, const0_rtx); | |
373 | ||
f700c7ca | 374 | if (addr1_alignment < 4 && addr2_alignment < 4) |
45731f37 OE |
375 | { |
376 | emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr)); | |
3b140613 | 377 | emit_insn (gen_tstsi_t (tmp1, GEN_INT (3))); |
45731f37 OE |
378 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); |
379 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
380 | } | |
f700c7ca OE |
381 | else if (addr1_alignment < 4 && addr2_alignment >= 4) |
382 | { | |
383 | emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3))); | |
384 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
385 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
386 | } | |
387 | else if (addr1_alignment >= 4 && addr2_alignment < 4) | |
388 | { | |
389 | emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3))); | |
390 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
391 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
392 | } | |
45731f37 OE |
393 | |
394 | /* word count. Do we have iterations ? */ | |
395 | emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); | |
396 | ||
397 | /* start long loop. */ | |
398 | emit_label (L_loop_long); | |
399 | ||
400 | /* tmp2 is aligned, OK to load. */ | |
401 | emit_move_insn (tmp2, addr2); | |
402 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, | |
403 | GET_MODE_SIZE (SImode))); | |
404 | ||
405 | /* tmp1 is aligned, OK to load. */ | |
406 | emit_move_insn (tmp1, addr1); | |
407 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, | |
408 | GET_MODE_SIZE (SImode))); | |
409 | ||
410 | /* Is there a 0 byte ? */ | |
411 | emit_insn (gen_andsi3 (tmp3, tmp2, tmp1)); | |
412 | ||
413 | emit_insn (gen_cmpstr_t (tmp0, tmp3)); | |
414 | jump = emit_jump_insn (gen_branch_true (L_end_loop_long)); | |
415 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
416 | ||
417 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
418 | jump = emit_jump_insn (gen_branch_false (L_end_loop_long)); | |
419 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
420 | ||
421 | if (TARGET_SH2) | |
422 | emit_insn (gen_dect (lenw, lenw)); | |
423 | else | |
424 | { | |
425 | emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1))); | |
426 | emit_insn (gen_tstsi_t (lenw, lenw)); | |
427 | } | |
428 | ||
429 | jump = emit_jump_insn (gen_branch_false (L_loop_long)); | |
430 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
431 | ||
432 | int sbytes = bytes % 4; | |
433 | ||
434 | /* end loop. Reached max iterations. */ | |
435 | if (sbytes == 0) | |
436 | { | |
6a6b03ba | 437 | emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); |
45731f37 OE |
438 | jump = emit_jump_insn (gen_jump_compact (L_return)); |
439 | emit_barrier_after (jump); | |
440 | } | |
441 | else | |
442 | { | |
443 | /* Remaining bytes to check. */ | |
444 | ||
445 | addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); | |
446 | addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); | |
447 | ||
448 | while (sbytes--) | |
449 | { | |
450 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
451 | emit_insn (gen_extendqisi2 (tmp2, addr2)); | |
452 | ||
453 | emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); | |
454 | jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); | |
455 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
456 | ||
457 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
458 | if (flag_delayed_branch) | |
459 | emit_insn (gen_zero_extendqisi2 (tmp2, | |
460 | gen_lowpart (QImode, | |
461 | tmp2))); | |
462 | jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); | |
463 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
464 | ||
465 | addr1 = adjust_address (addr1, QImode, | |
466 | GET_MODE_SIZE (QImode)); | |
467 | addr2 = adjust_address (addr2, QImode, | |
468 | GET_MODE_SIZE (QImode)); | |
469 | } | |
470 | ||
471 | jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte)); | |
472 | emit_barrier_after (jump); | |
473 | } | |
474 | ||
475 | emit_label (L_end_loop_long); | |
476 | ||
477 | /* Found last word. Restart it byte per byte. */ | |
478 | ||
479 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, | |
480 | -GET_MODE_SIZE (SImode))); | |
481 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, | |
482 | -GET_MODE_SIZE (SImode))); | |
483 | ||
484 | /* fall thru. */ | |
485 | } | |
eae298d6 CB |
486 | |
487 | addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); | |
488 | addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); | |
489 | ||
490 | while (bytes--) | |
45731f37 OE |
491 | { |
492 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
493 | emit_insn (gen_extendqisi2 (tmp2, addr2)); | |
494 | ||
495 | emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); | |
496 | jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); | |
497 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
498 | ||
499 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
500 | if (flag_delayed_branch) | |
501 | emit_insn (gen_zero_extendqisi2 (tmp2, | |
502 | gen_lowpart (QImode, tmp2))); | |
503 | jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); | |
504 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
505 | ||
506 | addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode)); | |
507 | addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode)); | |
508 | } | |
eae298d6 CB |
509 | |
510 | jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte)); | |
511 | emit_barrier_after (jump); | |
ca494b8d | 512 | } |
6a6b03ba CB |
513 | else |
514 | { | |
515 | emit_insn (gen_cmpeqsi_t (len, const0_rtx)); | |
516 | emit_move_insn (operands[0], const0_rtx); | |
517 | jump = emit_jump_insn (gen_branch_true (L_return)); | |
518 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
519 | } | |
ca494b8d | 520 | |
eae298d6 CB |
521 | addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); |
522 | addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); | |
3a1a7897 CB |
523 | |
524 | emit_label (L_loop_byte); | |
525 | ||
526 | emit_insn (gen_extendqisi2 (tmp2, addr2)); | |
527 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1)); | |
528 | ||
529 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
530 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1)); | |
531 | ||
532 | emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); | |
533 | jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); | |
534 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
535 | ||
536 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
537 | if (flag_delayed_branch) | |
538 | emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); | |
539 | jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); | |
540 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
541 | ||
542 | if (TARGET_SH2) | |
543 | emit_insn (gen_dect (len, len)); | |
544 | else | |
545 | { | |
546 | emit_insn (gen_addsi3 (len, len, GEN_INT (-1))); | |
547 | emit_insn (gen_tstsi_t (len, len)); | |
548 | } | |
549 | ||
550 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
551 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
552 | /* end byte loop. */ | |
553 | ||
554 | emit_label (L_end_loop_byte); | |
555 | ||
556 | if (! flag_delayed_branch) | |
557 | emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); | |
558 | emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1))); | |
559 | ||
3a1a7897 CB |
560 | emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); |
561 | ||
6a6b03ba CB |
562 | emit_label (L_return); |
563 | ||
3a1a7897 CB |
564 | return true; |
565 | } | |
566 | ||
45731f37 | 567 | /* Emit code to perform a strlen. |
3a1a7897 CB |
568 | |
569 | OPERANDS[0] is the destination. | |
570 | OPERANDS[1] is the string. | |
571 | OPERANDS[2] is the char to search. | |
572 | OPERANDS[3] is the alignment. */ | |
573 | bool | |
574 | sh_expand_strlen (rtx *operands) | |
575 | { | |
576 | rtx addr1 = operands[1]; | |
577 | rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0)); | |
578 | rtx start_addr = gen_reg_rtx (Pmode); | |
579 | rtx tmp0 = gen_reg_rtx (SImode); | |
580 | rtx tmp1 = gen_reg_rtx (SImode); | |
19f8b229 TS |
581 | rtx_code_label *L_return = gen_label_rtx (); |
582 | rtx_code_label *L_loop_byte = gen_label_rtx (); | |
3a1a7897 CB |
583 | |
584 | rtx jump; | |
19f8b229 TS |
585 | rtx_code_label *L_loop_long = gen_label_rtx (); |
586 | rtx_code_label *L_end_loop_long = gen_label_rtx (); | |
3a1a7897 CB |
587 | |
588 | int align = INTVAL (operands[3]); | |
589 | ||
590 | emit_move_insn (operands[0], GEN_INT (-1)); | |
591 | ||
592 | /* remember start of string. */ | |
593 | emit_move_insn (start_addr, current_addr); | |
594 | ||
595 | if (align < 4) | |
596 | { | |
3b140613 | 597 | emit_insn (gen_tstsi_t (current_addr, GEN_INT (3))); |
3a1a7897 CB |
598 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); |
599 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
600 | } | |
601 | ||
602 | emit_move_insn (tmp0, operands[2]); | |
603 | ||
604 | addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0); | |
605 | ||
36dc9ae8 | 606 | /* start long loop. */ |
3a1a7897 CB |
607 | emit_label (L_loop_long); |
608 | ||
609 | /* tmp1 is aligned, OK to load. */ | |
610 | emit_move_insn (tmp1, addr1); | |
611 | emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4)); | |
612 | ||
613 | /* Is there a 0 byte ? */ | |
614 | emit_insn (gen_cmpstr_t (tmp0, tmp1)); | |
ca494b8d | 615 | |
3a1a7897 CB |
616 | jump = emit_jump_insn (gen_branch_false (L_loop_long)); |
617 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
618 | /* end loop. */ | |
ca494b8d | 619 | |
3a1a7897 | 620 | emit_label (L_end_loop_long); |
ca494b8d | 621 | |
3a1a7897 | 622 | emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4)); |
ca494b8d | 623 | |
3a1a7897 | 624 | addr1 = adjust_address (addr1, QImode, 0); |
ca494b8d | 625 | |
9447df74 | 626 | /* unroll remaining bytes. */ |
36dc9ae8 OE |
627 | for (int i = 0; i < 4; ++i) |
628 | { | |
629 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
630 | emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); | |
631 | emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); | |
632 | jump = emit_jump_insn (gen_branch_true (L_return)); | |
633 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
634 | } | |
9447df74 | 635 | |
9447df74 CB |
636 | emit_barrier_after (jump); |
637 | ||
638 | /* start byte loop. */ | |
3a1a7897 | 639 | emit_label (L_loop_byte); |
ca494b8d | 640 | |
3a1a7897 CB |
641 | emit_insn (gen_extendqisi2 (tmp1, addr1)); |
642 | emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); | |
ca494b8d | 643 | |
3a1a7897 CB |
644 | emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); |
645 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
646 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
ca494b8d | 647 | |
3a1a7897 | 648 | /* end loop. */ |
ca494b8d | 649 | |
9447df74 CB |
650 | emit_label (L_return); |
651 | ||
36dc9ae8 | 652 | emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1))); |
3a1a7897 | 653 | emit_insn (gen_subsi3 (operands[0], current_addr, start_addr)); |
ca494b8d | 654 | |
3a1a7897 | 655 | return true; |
ca494b8d | 656 | } |
fa1aecc1 | 657 | |
45731f37 | 658 | /* Emit code to perform a memset. |
fa1aecc1 CB |
659 | |
660 | OPERANDS[0] is the destination. | |
661 | OPERANDS[1] is the size; | |
662 | OPERANDS[2] is the char to search. | |
663 | OPERANDS[3] is the alignment. */ | |
664 | void | |
665 | sh_expand_setmem (rtx *operands) | |
666 | { | |
19f8b229 TS |
667 | rtx_code_label *L_loop_byte = gen_label_rtx (); |
668 | rtx_code_label *L_loop_word = gen_label_rtx (); | |
669 | rtx_code_label *L_return = gen_label_rtx (); | |
fa1aecc1 CB |
670 | rtx jump; |
671 | rtx dest = copy_rtx (operands[0]); | |
672 | rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0)); | |
673 | rtx val = force_reg (SImode, operands[2]); | |
674 | int align = INTVAL (operands[3]); | |
fa1aecc1 CB |
675 | rtx len = force_reg (SImode, operands[1]); |
676 | ||
677 | if (! CONST_INT_P (operands[1])) | |
678 | return; | |
679 | ||
45731f37 | 680 | int count = INTVAL (operands[1]); |
fa1aecc1 CB |
681 | |
682 | if (CONST_INT_P (operands[2]) | |
683 | && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8) | |
684 | { | |
685 | rtx lenw = gen_reg_rtx (SImode); | |
686 | ||
687 | if (align < 4) | |
45731f37 | 688 | { |
3b140613 | 689 | emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3))); |
45731f37 OE |
690 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); |
691 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
692 | } | |
fa1aecc1 | 693 | |
45731f37 | 694 | /* word count. Do we have iterations ? */ |
fa1aecc1 CB |
695 | emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); |
696 | ||
697 | dest = adjust_automodify_address (dest, SImode, dest_addr, 0); | |
698 | ||
699 | /* start loop. */ | |
700 | emit_label (L_loop_word); | |
701 | ||
702 | if (TARGET_SH2) | |
703 | emit_insn (gen_dect (lenw, lenw)); | |
704 | else | |
45731f37 OE |
705 | { |
706 | emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1))); | |
707 | emit_insn (gen_tstsi_t (lenw, lenw)); | |
708 | } | |
fa1aecc1 CB |
709 | |
710 | emit_move_insn (dest, val); | |
711 | emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, | |
45731f37 | 712 | GET_MODE_SIZE (SImode))); |
fa1aecc1 CB |
713 | |
714 | ||
715 | jump = emit_jump_insn (gen_branch_false (L_loop_word)); | |
716 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
717 | count = count % 4; | |
718 | ||
719 | dest = adjust_address (dest, QImode, 0); | |
720 | ||
721 | val = gen_lowpart (QImode, val); | |
722 | ||
723 | while (count--) | |
45731f37 OE |
724 | { |
725 | emit_move_insn (dest, val); | |
726 | emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, | |
727 | GET_MODE_SIZE (QImode))); | |
728 | } | |
fa1aecc1 CB |
729 | |
730 | jump = emit_jump_insn (gen_jump_compact (L_return)); | |
731 | emit_barrier_after (jump); | |
732 | } | |
733 | ||
734 | dest = adjust_automodify_address (dest, QImode, dest_addr, 0); | |
735 | ||
736 | /* start loop. */ | |
737 | emit_label (L_loop_byte); | |
738 | ||
739 | if (TARGET_SH2) | |
740 | emit_insn (gen_dect (len, len)); | |
741 | else | |
742 | { | |
743 | emit_insn (gen_addsi3 (len, len, GEN_INT (-1))); | |
744 | emit_insn (gen_tstsi_t (len, len)); | |
745 | } | |
746 | ||
747 | val = gen_lowpart (QImode, val); | |
748 | emit_move_insn (dest, val); | |
749 | emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, | |
750 | GET_MODE_SIZE (QImode))); | |
751 | ||
752 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
753 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
754 | ||
755 | emit_label (L_return); | |
fa1aecc1 | 756 | } |