]>
Commit | Line | Data |
---|---|---|
8e701300 | 1 | /* Helper routines for memory move and comparison insns. |
5624e564 | 2 | Copyright (C) 2013-2015 Free Software Foundation, Inc. |
8e701300 CB |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "tm.h" | |
8e701300 | 24 | #include "rtl.h" |
40e23961 | 25 | #include "alias.h" |
8e701300 | 26 | #include "tree.h" |
36566b39 PK |
27 | #include "function.h" |
28 | #include "flags.h" | |
36566b39 PK |
29 | #include "insn-config.h" |
30 | #include "expmed.h" | |
31 | #include "dojump.h" | |
32 | #include "explow.h" | |
33 | #include "calls.h" | |
34 | #include "emit-rtl.h" | |
35 | #include "varasm.h" | |
36 | #include "stmt.h" | |
8e701300 CB |
37 | #include "expr.h" |
38 | #include "tm_p.h" | |
60393bbc | 39 | #include "predict.h" |
60393bbc AM |
40 | #include "cfg.h" |
41 | #include "cfgrtl.h" | |
42 | #include "cfganal.h" | |
43 | #include "lcm.h" | |
44 | #include "cfgbuild.h" | |
45 | #include "cfgcleanup.h" | |
8e701300 CB |
46 | #include "basic-block.h" |
47 | ||
48 | /* Like force_operand, but guarantees that VALUE ends up in TARGET. */ | |
49 | static void | |
50 | force_into (rtx value, rtx target) | |
51 | { | |
52 | value = force_operand (value, target); | |
53 | if (! rtx_equal_p (value, target)) | |
54 | emit_insn (gen_move_insn (target, value)); | |
55 | } | |
56 | ||
57 | /* Emit code to perform a block move. Choose the best method. | |
58 | ||
59 | OPERANDS[0] is the destination. | |
60 | OPERANDS[1] is the source. | |
61 | OPERANDS[2] is the size. | |
62 | OPERANDS[3] is the alignment safe to use. */ | |
63 | bool | |
64 | expand_block_move (rtx *operands) | |
65 | { | |
66 | int align = INTVAL (operands[3]); | |
67 | int constp = (CONST_INT_P (operands[2])); | |
68 | int bytes = (constp ? INTVAL (operands[2]) : 0); | |
69 | ||
70 | if (! constp) | |
71 | return false; | |
72 | ||
73 | /* If we could use mov.l to move words and dest is word-aligned, we | |
74 | can use movua.l for loads and still generate a relatively short | |
75 | and efficient sequence. */ | |
f3ca7111 | 76 | if (TARGET_SH4A && align < 4 |
8e701300 CB |
77 | && MEM_ALIGN (operands[0]) >= 32 |
78 | && can_move_by_pieces (bytes, 32)) | |
79 | { | |
80 | rtx dest = copy_rtx (operands[0]); | |
81 | rtx src = copy_rtx (operands[1]); | |
82 | /* We could use different pseudos for each copied word, but | |
83 | since movua can only load into r0, it's kind of | |
84 | pointless. */ | |
85 | rtx temp = gen_reg_rtx (SImode); | |
86 | rtx src_addr = copy_addr_to_reg (XEXP (src, 0)); | |
87 | int copied = 0; | |
88 | ||
89 | while (copied + 4 <= bytes) | |
90 | { | |
91 | rtx to = adjust_address (dest, SImode, copied); | |
92 | rtx from = adjust_automodify_address (src, BLKmode, | |
93 | src_addr, copied); | |
94 | ||
95 | set_mem_size (from, 4); | |
96 | emit_insn (gen_movua (temp, from)); | |
97 | emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4)); | |
98 | emit_move_insn (to, temp); | |
99 | copied += 4; | |
100 | } | |
101 | ||
102 | if (copied < bytes) | |
103 | move_by_pieces (adjust_address (dest, BLKmode, copied), | |
104 | adjust_automodify_address (src, BLKmode, | |
105 | src_addr, copied), | |
106 | bytes - copied, align, 0); | |
107 | ||
108 | return true; | |
109 | } | |
110 | ||
111 | /* If it isn't a constant number of bytes, or if it doesn't have 4 byte | |
112 | alignment, or if it isn't a multiple of 4 bytes, then fail. */ | |
113 | if (align < 4 || (bytes % 4 != 0)) | |
114 | return false; | |
115 | ||
116 | if (TARGET_HARD_SH4) | |
117 | { | |
118 | if (bytes < 12) | |
119 | return false; | |
120 | else if (bytes == 12) | |
121 | { | |
122 | rtx func_addr_rtx = gen_reg_rtx (Pmode); | |
123 | rtx r4 = gen_rtx_REG (SImode, 4); | |
124 | rtx r5 = gen_rtx_REG (SImode, 5); | |
125 | ||
126 | function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC); | |
127 | force_into (XEXP (operands[0], 0), r4); | |
128 | force_into (XEXP (operands[1], 0), r5); | |
129 | emit_insn (gen_block_move_real_i4 (func_addr_rtx)); | |
130 | return true; | |
131 | } | |
132 | else if (! optimize_size) | |
133 | { | |
134 | const char *entry_name; | |
135 | rtx func_addr_rtx = gen_reg_rtx (Pmode); | |
136 | int dwords; | |
137 | rtx r4 = gen_rtx_REG (SImode, 4); | |
138 | rtx r5 = gen_rtx_REG (SImode, 5); | |
139 | rtx r6 = gen_rtx_REG (SImode, 6); | |
140 | ||
141 | entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even"); | |
142 | function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC); | |
143 | force_into (XEXP (operands[0], 0), r4); | |
144 | force_into (XEXP (operands[1], 0), r5); | |
145 | ||
146 | dwords = bytes >> 3; | |
147 | emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1))); | |
148 | emit_insn (gen_block_lump_real_i4 (func_addr_rtx)); | |
149 | return true; | |
150 | } | |
151 | else | |
152 | return false; | |
153 | } | |
154 | if (bytes < 64) | |
155 | { | |
156 | char entry[30]; | |
157 | rtx func_addr_rtx = gen_reg_rtx (Pmode); | |
158 | rtx r4 = gen_rtx_REG (SImode, 4); | |
159 | rtx r5 = gen_rtx_REG (SImode, 5); | |
160 | ||
161 | sprintf (entry, "__movmemSI%d", bytes); | |
162 | function_symbol (func_addr_rtx, entry, SFUNC_STATIC); | |
163 | force_into (XEXP (operands[0], 0), r4); | |
164 | force_into (XEXP (operands[1], 0), r5); | |
165 | emit_insn (gen_block_move_real (func_addr_rtx)); | |
166 | return true; | |
167 | } | |
168 | ||
169 | /* This is the same number of bytes as a memcpy call, but to a different | |
170 | less common function name, so this will occasionally use more space. */ | |
171 | if (! optimize_size) | |
172 | { | |
173 | rtx func_addr_rtx = gen_reg_rtx (Pmode); | |
174 | int final_switch, while_loop; | |
175 | rtx r4 = gen_rtx_REG (SImode, 4); | |
176 | rtx r5 = gen_rtx_REG (SImode, 5); | |
177 | rtx r6 = gen_rtx_REG (SImode, 6); | |
178 | ||
179 | function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC); | |
180 | force_into (XEXP (operands[0], 0), r4); | |
181 | force_into (XEXP (operands[1], 0), r5); | |
182 | ||
183 | /* r6 controls the size of the move. 16 is decremented from it | |
184 | for each 64 bytes moved. Then the negative bit left over is used | |
185 | as an index into a list of move instructions. e.g., a 72 byte move | |
186 | would be set up with size(r6) = 14, for one iteration through the | |
187 | big while loop, and a switch of -2 for the last part. */ | |
188 | ||
189 | final_switch = 16 - ((bytes / 4) % 16); | |
190 | while_loop = ((bytes / 4) / 16 - 1) * 16; | |
191 | emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch))); | |
192 | emit_insn (gen_block_lump_real (func_addr_rtx)); | |
193 | return true; | |
194 | } | |
195 | ||
196 | return false; | |
197 | } | |
198 | ||
45731f37 OE |
199 | static const int prob_unlikely = REG_BR_PROB_BASE / 10; |
200 | static const int prob_likely = REG_BR_PROB_BASE / 4; | |
3a1a7897 | 201 | |
8e701300 CB |
202 | /* Emit code to perform a strcmp. |
203 | ||
204 | OPERANDS[0] is the destination. | |
205 | OPERANDS[1] is the first string. | |
206 | OPERANDS[2] is the second string. | |
3a1a7897 | 207 | OPERANDS[3] is the known alignment. */ |
8e701300 CB |
208 | bool |
209 | sh_expand_cmpstr (rtx *operands) | |
210 | { | |
3a1a7897 CB |
211 | rtx addr1 = operands[1]; |
212 | rtx addr2 = operands[2]; | |
213 | rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); | |
214 | rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); | |
8e701300 CB |
215 | rtx tmp0 = gen_reg_rtx (SImode); |
216 | rtx tmp1 = gen_reg_rtx (SImode); | |
217 | rtx tmp2 = gen_reg_rtx (SImode); | |
218 | rtx tmp3 = gen_reg_rtx (SImode); | |
219 | ||
3a1a7897 | 220 | rtx jump; |
19f8b229 TS |
221 | rtx_code_label *L_return = gen_label_rtx (); |
222 | rtx_code_label *L_loop_byte = gen_label_rtx (); | |
223 | rtx_code_label *L_end_loop_byte = gen_label_rtx (); | |
224 | rtx_code_label *L_loop_long = gen_label_rtx (); | |
225 | rtx_code_label *L_end_loop_long = gen_label_rtx (); | |
8e701300 | 226 | |
ca494b8d | 227 | int align = INTVAL (operands[3]); |
8e701300 CB |
228 | |
229 | emit_move_insn (tmp0, const0_rtx); | |
230 | ||
ca494b8d CB |
231 | if (align < 4) |
232 | { | |
233 | emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr)); | |
3b140613 | 234 | emit_insn (gen_tstsi_t (tmp1, GEN_INT (3))); |
ca494b8d CB |
235 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); |
236 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
237 | } | |
8e701300 | 238 | |
3a1a7897 CB |
239 | addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); |
240 | addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); | |
8e701300 CB |
241 | |
242 | /* tmp2 is aligned, OK to load. */ | |
243 | emit_move_insn (tmp3, addr2); | |
244 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4)); | |
245 | ||
45731f37 | 246 | /* start long loop. */ |
8e701300 CB |
247 | emit_label (L_loop_long); |
248 | ||
249 | emit_move_insn (tmp2, tmp3); | |
250 | ||
251 | /* tmp1 is aligned, OK to load. */ | |
252 | emit_move_insn (tmp1, addr1); | |
253 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4)); | |
254 | ||
255 | /* Is there a 0 byte ? */ | |
256 | emit_insn (gen_andsi3 (tmp3, tmp3, tmp1)); | |
257 | ||
258 | emit_insn (gen_cmpstr_t (tmp0, tmp3)); | |
259 | jump = emit_jump_insn (gen_branch_true (L_end_loop_long)); | |
260 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
261 | ||
262 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
263 | ||
264 | /* tmp2 is aligned, OK to load. */ | |
265 | emit_move_insn (tmp3, addr2); | |
266 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4)); | |
267 | ||
268 | jump = emit_jump_insn (gen_branch_true (L_loop_long)); | |
269 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
270 | /* end loop. */ | |
271 | ||
ca494b8d | 272 | /* Fallthu, substract words. */ |
8e701300 CB |
273 | if (TARGET_LITTLE_ENDIAN) |
274 | { | |
275 | rtx low_1 = gen_lowpart (HImode, tmp1); | |
276 | rtx low_2 = gen_lowpart (HImode, tmp2); | |
277 | ||
278 | emit_insn (gen_rotlhi3_8 (low_1, low_1)); | |
279 | emit_insn (gen_rotlhi3_8 (low_2, low_2)); | |
280 | emit_insn (gen_rotlsi3_16 (tmp1, tmp1)); | |
281 | emit_insn (gen_rotlsi3_16 (tmp2, tmp2)); | |
282 | emit_insn (gen_rotlhi3_8 (low_1, low_1)); | |
283 | emit_insn (gen_rotlhi3_8 (low_2, low_2)); | |
284 | } | |
285 | ||
286 | jump = emit_jump_insn (gen_jump_compact (L_return)); | |
287 | emit_barrier_after (jump); | |
288 | ||
8e701300 CB |
289 | emit_label (L_end_loop_long); |
290 | ||
291 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4)); | |
292 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4)); | |
293 | ||
ca494b8d | 294 | /* start byte loop. */ |
3a1a7897 CB |
295 | addr1 = adjust_address (addr1, QImode, 0); |
296 | addr2 = adjust_address (addr2, QImode, 0); | |
ca494b8d | 297 | |
8e701300 CB |
298 | emit_label (L_loop_byte); |
299 | ||
300 | emit_insn (gen_extendqisi2 (tmp2, addr2)); | |
301 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1)); | |
302 | ||
303 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
304 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1)); | |
305 | ||
306 | emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); | |
307 | jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); | |
308 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
309 | ||
310 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
ca494b8d CB |
311 | if (flag_delayed_branch) |
312 | emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); | |
313 | jump = emit_jump_insn (gen_branch_true (L_loop_byte)); | |
8e701300 CB |
314 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); |
315 | /* end loop. */ | |
316 | ||
317 | emit_label (L_end_loop_byte); | |
318 | ||
ca494b8d CB |
319 | if (! flag_delayed_branch) |
320 | emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); | |
8e701300 CB |
321 | emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1))); |
322 | ||
323 | emit_label (L_return); | |
324 | ||
325 | emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); | |
326 | ||
327 | return true; | |
328 | } | |
329 | ||
ca494b8d CB |
330 | /* Emit code to perform a strncmp. |
331 | ||
332 | OPERANDS[0] is the destination. | |
333 | OPERANDS[1] is the first string. | |
334 | OPERANDS[2] is the second string. | |
335 | OPERANDS[3] is the length. | |
3a1a7897 | 336 | OPERANDS[4] is the known alignment. */ |
ca494b8d CB |
337 | bool |
338 | sh_expand_cmpnstr (rtx *operands) | |
339 | { | |
3a1a7897 CB |
340 | rtx addr1 = operands[1]; |
341 | rtx addr2 = operands[2]; | |
342 | rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); | |
343 | rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); | |
ca494b8d CB |
344 | rtx tmp1 = gen_reg_rtx (SImode); |
345 | rtx tmp2 = gen_reg_rtx (SImode); | |
346 | ||
3a1a7897 | 347 | rtx jump; |
19f8b229 TS |
348 | rtx_code_label *L_return = gen_label_rtx (); |
349 | rtx_code_label *L_loop_byte = gen_label_rtx (); | |
350 | rtx_code_label *L_end_loop_byte = gen_label_rtx (); | |
ca494b8d | 351 | |
ca494b8d | 352 | rtx len = force_reg (SImode, operands[3]); |
eae298d6 | 353 | int constp = CONST_INT_P (operands[3]); |
ca494b8d | 354 | |
45731f37 | 355 | /* Loop on a register count. */ |
eae298d6 | 356 | if (constp) |
ca494b8d | 357 | { |
eae298d6 | 358 | rtx tmp0 = gen_reg_rtx (SImode); |
ca494b8d CB |
359 | rtx tmp3 = gen_reg_rtx (SImode); |
360 | rtx lenw = gen_reg_rtx (SImode); | |
ca494b8d | 361 | |
19f8b229 TS |
362 | rtx_code_label *L_loop_long = gen_label_rtx (); |
363 | rtx_code_label *L_end_loop_long = gen_label_rtx (); | |
ca494b8d | 364 | |
eae298d6 CB |
365 | int align = INTVAL (operands[4]); |
366 | int bytes = INTVAL (operands[3]); | |
367 | int witers = bytes / 4; | |
368 | ||
369 | if (witers > 1) | |
45731f37 OE |
370 | { |
371 | addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); | |
372 | addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); | |
373 | ||
374 | emit_move_insn (tmp0, const0_rtx); | |
375 | ||
376 | if (align < 4) | |
377 | { | |
378 | emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr)); | |
3b140613 | 379 | emit_insn (gen_tstsi_t (tmp1, GEN_INT (3))); |
45731f37 OE |
380 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); |
381 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
382 | } | |
383 | ||
384 | /* word count. Do we have iterations ? */ | |
385 | emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); | |
386 | ||
387 | /* start long loop. */ | |
388 | emit_label (L_loop_long); | |
389 | ||
390 | /* tmp2 is aligned, OK to load. */ | |
391 | emit_move_insn (tmp2, addr2); | |
392 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, | |
393 | GET_MODE_SIZE (SImode))); | |
394 | ||
395 | /* tmp1 is aligned, OK to load. */ | |
396 | emit_move_insn (tmp1, addr1); | |
397 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, | |
398 | GET_MODE_SIZE (SImode))); | |
399 | ||
400 | /* Is there a 0 byte ? */ | |
401 | emit_insn (gen_andsi3 (tmp3, tmp2, tmp1)); | |
402 | ||
403 | emit_insn (gen_cmpstr_t (tmp0, tmp3)); | |
404 | jump = emit_jump_insn (gen_branch_true (L_end_loop_long)); | |
405 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
406 | ||
407 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
408 | jump = emit_jump_insn (gen_branch_false (L_end_loop_long)); | |
409 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
410 | ||
411 | if (TARGET_SH2) | |
412 | emit_insn (gen_dect (lenw, lenw)); | |
413 | else | |
414 | { | |
415 | emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1))); | |
416 | emit_insn (gen_tstsi_t (lenw, lenw)); | |
417 | } | |
418 | ||
419 | jump = emit_jump_insn (gen_branch_false (L_loop_long)); | |
420 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
421 | ||
422 | int sbytes = bytes % 4; | |
423 | ||
424 | /* end loop. Reached max iterations. */ | |
425 | if (sbytes == 0) | |
426 | { | |
6a6b03ba | 427 | emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); |
45731f37 OE |
428 | jump = emit_jump_insn (gen_jump_compact (L_return)); |
429 | emit_barrier_after (jump); | |
430 | } | |
431 | else | |
432 | { | |
433 | /* Remaining bytes to check. */ | |
434 | ||
435 | addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); | |
436 | addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); | |
437 | ||
438 | while (sbytes--) | |
439 | { | |
440 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
441 | emit_insn (gen_extendqisi2 (tmp2, addr2)); | |
442 | ||
443 | emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); | |
444 | jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); | |
445 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
446 | ||
447 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
448 | if (flag_delayed_branch) | |
449 | emit_insn (gen_zero_extendqisi2 (tmp2, | |
450 | gen_lowpart (QImode, | |
451 | tmp2))); | |
452 | jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); | |
453 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
454 | ||
455 | addr1 = adjust_address (addr1, QImode, | |
456 | GET_MODE_SIZE (QImode)); | |
457 | addr2 = adjust_address (addr2, QImode, | |
458 | GET_MODE_SIZE (QImode)); | |
459 | } | |
460 | ||
461 | jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte)); | |
462 | emit_barrier_after (jump); | |
463 | } | |
464 | ||
465 | emit_label (L_end_loop_long); | |
466 | ||
467 | /* Found last word. Restart it byte per byte. */ | |
468 | ||
469 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, | |
470 | -GET_MODE_SIZE (SImode))); | |
471 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, | |
472 | -GET_MODE_SIZE (SImode))); | |
473 | ||
474 | /* fall thru. */ | |
475 | } | |
eae298d6 CB |
476 | |
477 | addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); | |
478 | addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); | |
479 | ||
480 | while (bytes--) | |
45731f37 OE |
481 | { |
482 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
483 | emit_insn (gen_extendqisi2 (tmp2, addr2)); | |
484 | ||
485 | emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); | |
486 | jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); | |
487 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
488 | ||
489 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
490 | if (flag_delayed_branch) | |
491 | emit_insn (gen_zero_extendqisi2 (tmp2, | |
492 | gen_lowpart (QImode, tmp2))); | |
493 | jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); | |
494 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
495 | ||
496 | addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode)); | |
497 | addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode)); | |
498 | } | |
eae298d6 CB |
499 | |
500 | jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte)); | |
501 | emit_barrier_after (jump); | |
ca494b8d | 502 | } |
6a6b03ba CB |
503 | else |
504 | { | |
505 | emit_insn (gen_cmpeqsi_t (len, const0_rtx)); | |
506 | emit_move_insn (operands[0], const0_rtx); | |
507 | jump = emit_jump_insn (gen_branch_true (L_return)); | |
508 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
509 | } | |
ca494b8d | 510 | |
eae298d6 CB |
511 | addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); |
512 | addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); | |
3a1a7897 CB |
513 | |
514 | emit_label (L_loop_byte); | |
515 | ||
516 | emit_insn (gen_extendqisi2 (tmp2, addr2)); | |
517 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1)); | |
518 | ||
519 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
520 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1)); | |
521 | ||
522 | emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); | |
523 | jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); | |
524 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
525 | ||
526 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
527 | if (flag_delayed_branch) | |
528 | emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); | |
529 | jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); | |
530 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
531 | ||
532 | if (TARGET_SH2) | |
533 | emit_insn (gen_dect (len, len)); | |
534 | else | |
535 | { | |
536 | emit_insn (gen_addsi3 (len, len, GEN_INT (-1))); | |
537 | emit_insn (gen_tstsi_t (len, len)); | |
538 | } | |
539 | ||
540 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
541 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
542 | /* end byte loop. */ | |
543 | ||
544 | emit_label (L_end_loop_byte); | |
545 | ||
546 | if (! flag_delayed_branch) | |
547 | emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); | |
548 | emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1))); | |
549 | ||
3a1a7897 CB |
550 | emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); |
551 | ||
6a6b03ba CB |
552 | emit_label (L_return); |
553 | ||
3a1a7897 CB |
554 | return true; |
555 | } | |
556 | ||
45731f37 | 557 | /* Emit code to perform a strlen. |
3a1a7897 CB |
558 | |
559 | OPERANDS[0] is the destination. | |
560 | OPERANDS[1] is the string. | |
561 | OPERANDS[2] is the char to search. | |
562 | OPERANDS[3] is the alignment. */ | |
563 | bool | |
564 | sh_expand_strlen (rtx *operands) | |
565 | { | |
566 | rtx addr1 = operands[1]; | |
567 | rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0)); | |
568 | rtx start_addr = gen_reg_rtx (Pmode); | |
569 | rtx tmp0 = gen_reg_rtx (SImode); | |
570 | rtx tmp1 = gen_reg_rtx (SImode); | |
19f8b229 TS |
571 | rtx_code_label *L_return = gen_label_rtx (); |
572 | rtx_code_label *L_loop_byte = gen_label_rtx (); | |
3a1a7897 CB |
573 | |
574 | rtx jump; | |
19f8b229 TS |
575 | rtx_code_label *L_loop_long = gen_label_rtx (); |
576 | rtx_code_label *L_end_loop_long = gen_label_rtx (); | |
3a1a7897 CB |
577 | |
578 | int align = INTVAL (operands[3]); | |
579 | ||
580 | emit_move_insn (operands[0], GEN_INT (-1)); | |
581 | ||
582 | /* remember start of string. */ | |
583 | emit_move_insn (start_addr, current_addr); | |
584 | ||
585 | if (align < 4) | |
586 | { | |
3b140613 | 587 | emit_insn (gen_tstsi_t (current_addr, GEN_INT (3))); |
3a1a7897 CB |
588 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); |
589 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
590 | } | |
591 | ||
592 | emit_move_insn (tmp0, operands[2]); | |
593 | ||
594 | addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0); | |
595 | ||
36dc9ae8 | 596 | /* start long loop. */ |
3a1a7897 CB |
597 | emit_label (L_loop_long); |
598 | ||
599 | /* tmp1 is aligned, OK to load. */ | |
600 | emit_move_insn (tmp1, addr1); | |
601 | emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4)); | |
602 | ||
603 | /* Is there a 0 byte ? */ | |
604 | emit_insn (gen_cmpstr_t (tmp0, tmp1)); | |
ca494b8d | 605 | |
3a1a7897 CB |
606 | jump = emit_jump_insn (gen_branch_false (L_loop_long)); |
607 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
608 | /* end loop. */ | |
ca494b8d | 609 | |
3a1a7897 | 610 | emit_label (L_end_loop_long); |
ca494b8d | 611 | |
3a1a7897 | 612 | emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4)); |
ca494b8d | 613 | |
3a1a7897 | 614 | addr1 = adjust_address (addr1, QImode, 0); |
ca494b8d | 615 | |
9447df74 | 616 | /* unroll remaining bytes. */ |
36dc9ae8 OE |
617 | for (int i = 0; i < 4; ++i) |
618 | { | |
619 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
620 | emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); | |
621 | emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); | |
622 | jump = emit_jump_insn (gen_branch_true (L_return)); | |
623 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
624 | } | |
9447df74 | 625 | |
9447df74 CB |
626 | emit_barrier_after (jump); |
627 | ||
628 | /* start byte loop. */ | |
3a1a7897 | 629 | emit_label (L_loop_byte); |
ca494b8d | 630 | |
3a1a7897 CB |
631 | emit_insn (gen_extendqisi2 (tmp1, addr1)); |
632 | emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); | |
ca494b8d | 633 | |
3a1a7897 CB |
634 | emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); |
635 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
636 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
ca494b8d | 637 | |
3a1a7897 | 638 | /* end loop. */ |
ca494b8d | 639 | |
9447df74 CB |
640 | emit_label (L_return); |
641 | ||
36dc9ae8 | 642 | emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1))); |
3a1a7897 | 643 | emit_insn (gen_subsi3 (operands[0], current_addr, start_addr)); |
ca494b8d | 644 | |
3a1a7897 | 645 | return true; |
ca494b8d | 646 | } |
fa1aecc1 | 647 | |
45731f37 | 648 | /* Emit code to perform a memset. |
fa1aecc1 CB |
649 | |
650 | OPERANDS[0] is the destination. | |
651 | OPERANDS[1] is the size; | |
652 | OPERANDS[2] is the char to search. | |
653 | OPERANDS[3] is the alignment. */ | |
654 | void | |
655 | sh_expand_setmem (rtx *operands) | |
656 | { | |
19f8b229 TS |
657 | rtx_code_label *L_loop_byte = gen_label_rtx (); |
658 | rtx_code_label *L_loop_word = gen_label_rtx (); | |
659 | rtx_code_label *L_return = gen_label_rtx (); | |
fa1aecc1 CB |
660 | rtx jump; |
661 | rtx dest = copy_rtx (operands[0]); | |
662 | rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0)); | |
663 | rtx val = force_reg (SImode, operands[2]); | |
664 | int align = INTVAL (operands[3]); | |
fa1aecc1 CB |
665 | rtx len = force_reg (SImode, operands[1]); |
666 | ||
667 | if (! CONST_INT_P (operands[1])) | |
668 | return; | |
669 | ||
45731f37 | 670 | int count = INTVAL (operands[1]); |
fa1aecc1 CB |
671 | |
672 | if (CONST_INT_P (operands[2]) | |
673 | && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8) | |
674 | { | |
675 | rtx lenw = gen_reg_rtx (SImode); | |
676 | ||
677 | if (align < 4) | |
45731f37 | 678 | { |
3b140613 | 679 | emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3))); |
45731f37 OE |
680 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); |
681 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
682 | } | |
fa1aecc1 | 683 | |
45731f37 | 684 | /* word count. Do we have iterations ? */ |
fa1aecc1 CB |
685 | emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); |
686 | ||
687 | dest = adjust_automodify_address (dest, SImode, dest_addr, 0); | |
688 | ||
689 | /* start loop. */ | |
690 | emit_label (L_loop_word); | |
691 | ||
692 | if (TARGET_SH2) | |
693 | emit_insn (gen_dect (lenw, lenw)); | |
694 | else | |
45731f37 OE |
695 | { |
696 | emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1))); | |
697 | emit_insn (gen_tstsi_t (lenw, lenw)); | |
698 | } | |
fa1aecc1 CB |
699 | |
700 | emit_move_insn (dest, val); | |
701 | emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, | |
45731f37 | 702 | GET_MODE_SIZE (SImode))); |
fa1aecc1 CB |
703 | |
704 | ||
705 | jump = emit_jump_insn (gen_branch_false (L_loop_word)); | |
706 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
707 | count = count % 4; | |
708 | ||
709 | dest = adjust_address (dest, QImode, 0); | |
710 | ||
711 | val = gen_lowpart (QImode, val); | |
712 | ||
713 | while (count--) | |
45731f37 OE |
714 | { |
715 | emit_move_insn (dest, val); | |
716 | emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, | |
717 | GET_MODE_SIZE (QImode))); | |
718 | } | |
fa1aecc1 CB |
719 | |
720 | jump = emit_jump_insn (gen_jump_compact (L_return)); | |
721 | emit_barrier_after (jump); | |
722 | } | |
723 | ||
724 | dest = adjust_automodify_address (dest, QImode, dest_addr, 0); | |
725 | ||
726 | /* start loop. */ | |
727 | emit_label (L_loop_byte); | |
728 | ||
729 | if (TARGET_SH2) | |
730 | emit_insn (gen_dect (len, len)); | |
731 | else | |
732 | { | |
733 | emit_insn (gen_addsi3 (len, len, GEN_INT (-1))); | |
734 | emit_insn (gen_tstsi_t (len, len)); | |
735 | } | |
736 | ||
737 | val = gen_lowpart (QImode, val); | |
738 | emit_move_insn (dest, val); | |
739 | emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, | |
740 | GET_MODE_SIZE (QImode))); | |
741 | ||
742 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
743 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
744 | ||
745 | emit_label (L_return); | |
fa1aecc1 | 746 | } |