]>
Commit | Line | Data |
---|---|---|
ccd57e8a | 1 | /* Helper routines for memory move and comparison insns. |
3aea1f79 | 2 | Copyright (C) 2013-2014 Free Software Foundation, Inc. |
ccd57e8a | 3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "tm.h" | |
24 | #include "machmode.h" | |
25 | #include "rtl.h" | |
26 | #include "tree.h" | |
27 | #include "expr.h" | |
28 | #include "tm_p.h" | |
29 | #include "basic-block.h" | |
30 | ||
31 | /* Like force_operand, but guarantees that VALUE ends up in TARGET. */ | |
32 | static void | |
33 | force_into (rtx value, rtx target) | |
34 | { | |
35 | value = force_operand (value, target); | |
36 | if (! rtx_equal_p (value, target)) | |
37 | emit_insn (gen_move_insn (target, value)); | |
38 | } | |
39 | ||
40 | /* Emit code to perform a block move. Choose the best method. | |
41 | ||
42 | OPERANDS[0] is the destination. | |
43 | OPERANDS[1] is the source. | |
44 | OPERANDS[2] is the size. | |
45 | OPERANDS[3] is the alignment safe to use. */ | |
46 | bool | |
47 | expand_block_move (rtx *operands) | |
48 | { | |
49 | int align = INTVAL (operands[3]); | |
50 | int constp = (CONST_INT_P (operands[2])); | |
51 | int bytes = (constp ? INTVAL (operands[2]) : 0); | |
52 | ||
53 | if (! constp) | |
54 | return false; | |
55 | ||
56 | /* If we could use mov.l to move words and dest is word-aligned, we | |
57 | can use movua.l for loads and still generate a relatively short | |
58 | and efficient sequence. */ | |
59 | if (TARGET_SH4A_ARCH && align < 4 | |
60 | && MEM_ALIGN (operands[0]) >= 32 | |
61 | && can_move_by_pieces (bytes, 32)) | |
62 | { | |
63 | rtx dest = copy_rtx (operands[0]); | |
64 | rtx src = copy_rtx (operands[1]); | |
65 | /* We could use different pseudos for each copied word, but | |
66 | since movua can only load into r0, it's kind of | |
67 | pointless. */ | |
68 | rtx temp = gen_reg_rtx (SImode); | |
69 | rtx src_addr = copy_addr_to_reg (XEXP (src, 0)); | |
70 | int copied = 0; | |
71 | ||
72 | while (copied + 4 <= bytes) | |
73 | { | |
74 | rtx to = adjust_address (dest, SImode, copied); | |
75 | rtx from = adjust_automodify_address (src, BLKmode, | |
76 | src_addr, copied); | |
77 | ||
78 | set_mem_size (from, 4); | |
79 | emit_insn (gen_movua (temp, from)); | |
80 | emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4)); | |
81 | emit_move_insn (to, temp); | |
82 | copied += 4; | |
83 | } | |
84 | ||
85 | if (copied < bytes) | |
86 | move_by_pieces (adjust_address (dest, BLKmode, copied), | |
87 | adjust_automodify_address (src, BLKmode, | |
88 | src_addr, copied), | |
89 | bytes - copied, align, 0); | |
90 | ||
91 | return true; | |
92 | } | |
93 | ||
94 | /* If it isn't a constant number of bytes, or if it doesn't have 4 byte | |
95 | alignment, or if it isn't a multiple of 4 bytes, then fail. */ | |
96 | if (align < 4 || (bytes % 4 != 0)) | |
97 | return false; | |
98 | ||
99 | if (TARGET_HARD_SH4) | |
100 | { | |
101 | if (bytes < 12) | |
102 | return false; | |
103 | else if (bytes == 12) | |
104 | { | |
105 | rtx func_addr_rtx = gen_reg_rtx (Pmode); | |
106 | rtx r4 = gen_rtx_REG (SImode, 4); | |
107 | rtx r5 = gen_rtx_REG (SImode, 5); | |
108 | ||
109 | function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC); | |
110 | force_into (XEXP (operands[0], 0), r4); | |
111 | force_into (XEXP (operands[1], 0), r5); | |
112 | emit_insn (gen_block_move_real_i4 (func_addr_rtx)); | |
113 | return true; | |
114 | } | |
115 | else if (! optimize_size) | |
116 | { | |
117 | const char *entry_name; | |
118 | rtx func_addr_rtx = gen_reg_rtx (Pmode); | |
119 | int dwords; | |
120 | rtx r4 = gen_rtx_REG (SImode, 4); | |
121 | rtx r5 = gen_rtx_REG (SImode, 5); | |
122 | rtx r6 = gen_rtx_REG (SImode, 6); | |
123 | ||
124 | entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even"); | |
125 | function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC); | |
126 | force_into (XEXP (operands[0], 0), r4); | |
127 | force_into (XEXP (operands[1], 0), r5); | |
128 | ||
129 | dwords = bytes >> 3; | |
130 | emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1))); | |
131 | emit_insn (gen_block_lump_real_i4 (func_addr_rtx)); | |
132 | return true; | |
133 | } | |
134 | else | |
135 | return false; | |
136 | } | |
137 | if (bytes < 64) | |
138 | { | |
139 | char entry[30]; | |
140 | rtx func_addr_rtx = gen_reg_rtx (Pmode); | |
141 | rtx r4 = gen_rtx_REG (SImode, 4); | |
142 | rtx r5 = gen_rtx_REG (SImode, 5); | |
143 | ||
144 | sprintf (entry, "__movmemSI%d", bytes); | |
145 | function_symbol (func_addr_rtx, entry, SFUNC_STATIC); | |
146 | force_into (XEXP (operands[0], 0), r4); | |
147 | force_into (XEXP (operands[1], 0), r5); | |
148 | emit_insn (gen_block_move_real (func_addr_rtx)); | |
149 | return true; | |
150 | } | |
151 | ||
152 | /* This is the same number of bytes as a memcpy call, but to a different | |
153 | less common function name, so this will occasionally use more space. */ | |
154 | if (! optimize_size) | |
155 | { | |
156 | rtx func_addr_rtx = gen_reg_rtx (Pmode); | |
157 | int final_switch, while_loop; | |
158 | rtx r4 = gen_rtx_REG (SImode, 4); | |
159 | rtx r5 = gen_rtx_REG (SImode, 5); | |
160 | rtx r6 = gen_rtx_REG (SImode, 6); | |
161 | ||
162 | function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC); | |
163 | force_into (XEXP (operands[0], 0), r4); | |
164 | force_into (XEXP (operands[1], 0), r5); | |
165 | ||
166 | /* r6 controls the size of the move. 16 is decremented from it | |
167 | for each 64 bytes moved. Then the negative bit left over is used | |
168 | as an index into a list of move instructions. e.g., a 72 byte move | |
169 | would be set up with size(r6) = 14, for one iteration through the | |
170 | big while loop, and a switch of -2 for the last part. */ | |
171 | ||
172 | final_switch = 16 - ((bytes / 4) % 16); | |
173 | while_loop = ((bytes / 4) / 16 - 1) * 16; | |
174 | emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch))); | |
175 | emit_insn (gen_block_lump_real (func_addr_rtx)); | |
176 | return true; | |
177 | } | |
178 | ||
179 | return false; | |
180 | } | |
181 | ||
c2daae6e | 182 | static int prob_unlikely = REG_BR_PROB_BASE / 10; |
183 | static int prob_likely = REG_BR_PROB_BASE / 4; | |
184 | ||
ccd57e8a | 185 | /* Emit code to perform a strcmp. |
186 | ||
187 | OPERANDS[0] is the destination. | |
188 | OPERANDS[1] is the first string. | |
189 | OPERANDS[2] is the second string. | |
c2daae6e | 190 | OPERANDS[3] is the known alignment. */ |
ccd57e8a | 191 | bool |
192 | sh_expand_cmpstr (rtx *operands) | |
193 | { | |
c2daae6e | 194 | rtx addr1 = operands[1]; |
195 | rtx addr2 = operands[2]; | |
196 | rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); | |
197 | rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); | |
ccd57e8a | 198 | rtx tmp0 = gen_reg_rtx (SImode); |
199 | rtx tmp1 = gen_reg_rtx (SImode); | |
200 | rtx tmp2 = gen_reg_rtx (SImode); | |
201 | rtx tmp3 = gen_reg_rtx (SImode); | |
202 | ||
c2daae6e | 203 | rtx jump; |
ccd57e8a | 204 | rtx L_return = gen_label_rtx (); |
205 | rtx L_loop_byte = gen_label_rtx (); | |
206 | rtx L_end_loop_byte = gen_label_rtx (); | |
b421555d | 207 | rtx L_loop_long = gen_label_rtx (); |
208 | rtx L_end_loop_long = gen_label_rtx (); | |
ccd57e8a | 209 | |
b421555d | 210 | int align = INTVAL (operands[3]); |
ccd57e8a | 211 | |
212 | emit_move_insn (tmp0, const0_rtx); | |
213 | ||
b421555d | 214 | if (align < 4) |
215 | { | |
216 | emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr)); | |
217 | emit_insn (gen_tstsi_t (GEN_INT (3), tmp1)); | |
218 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
219 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
220 | } | |
ccd57e8a | 221 | |
c2daae6e | 222 | addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); |
223 | addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); | |
ccd57e8a | 224 | |
225 | /* tmp2 is aligned, OK to load. */ | |
226 | emit_move_insn (tmp3, addr2); | |
227 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4)); | |
228 | ||
229 | /*start long loop. */ | |
230 | emit_label (L_loop_long); | |
231 | ||
232 | emit_move_insn (tmp2, tmp3); | |
233 | ||
234 | /* tmp1 is aligned, OK to load. */ | |
235 | emit_move_insn (tmp1, addr1); | |
236 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4)); | |
237 | ||
238 | /* Is there a 0 byte ? */ | |
239 | emit_insn (gen_andsi3 (tmp3, tmp3, tmp1)); | |
240 | ||
241 | emit_insn (gen_cmpstr_t (tmp0, tmp3)); | |
242 | jump = emit_jump_insn (gen_branch_true (L_end_loop_long)); | |
243 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
244 | ||
245 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
246 | ||
247 | /* tmp2 is aligned, OK to load. */ | |
248 | emit_move_insn (tmp3, addr2); | |
249 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4)); | |
250 | ||
251 | jump = emit_jump_insn (gen_branch_true (L_loop_long)); | |
252 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
253 | /* end loop. */ | |
254 | ||
b421555d | 255 | /* Fallthu, substract words. */ |
ccd57e8a | 256 | if (TARGET_LITTLE_ENDIAN) |
257 | { | |
258 | rtx low_1 = gen_lowpart (HImode, tmp1); | |
259 | rtx low_2 = gen_lowpart (HImode, tmp2); | |
260 | ||
261 | emit_insn (gen_rotlhi3_8 (low_1, low_1)); | |
262 | emit_insn (gen_rotlhi3_8 (low_2, low_2)); | |
263 | emit_insn (gen_rotlsi3_16 (tmp1, tmp1)); | |
264 | emit_insn (gen_rotlsi3_16 (tmp2, tmp2)); | |
265 | emit_insn (gen_rotlhi3_8 (low_1, low_1)); | |
266 | emit_insn (gen_rotlhi3_8 (low_2, low_2)); | |
267 | } | |
268 | ||
269 | jump = emit_jump_insn (gen_jump_compact (L_return)); | |
270 | emit_barrier_after (jump); | |
271 | ||
ccd57e8a | 272 | emit_label (L_end_loop_long); |
273 | ||
274 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4)); | |
275 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4)); | |
276 | ||
b421555d | 277 | /* start byte loop. */ |
c2daae6e | 278 | addr1 = adjust_address (addr1, QImode, 0); |
279 | addr2 = adjust_address (addr2, QImode, 0); | |
b421555d | 280 | |
ccd57e8a | 281 | emit_label (L_loop_byte); |
282 | ||
283 | emit_insn (gen_extendqisi2 (tmp2, addr2)); | |
284 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1)); | |
285 | ||
286 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
287 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1)); | |
288 | ||
289 | emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); | |
290 | jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); | |
291 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
292 | ||
293 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
b421555d | 294 | if (flag_delayed_branch) |
295 | emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); | |
296 | jump = emit_jump_insn (gen_branch_true (L_loop_byte)); | |
ccd57e8a | 297 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); |
298 | /* end loop. */ | |
299 | ||
300 | emit_label (L_end_loop_byte); | |
301 | ||
b421555d | 302 | if (! flag_delayed_branch) |
303 | emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); | |
ccd57e8a | 304 | emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1))); |
305 | ||
306 | emit_label (L_return); | |
307 | ||
308 | emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); | |
309 | ||
310 | return true; | |
311 | } | |
312 | ||
b421555d | 313 | /* Emit code to perform a strncmp. |
314 | ||
315 | OPERANDS[0] is the destination. | |
316 | OPERANDS[1] is the first string. | |
317 | OPERANDS[2] is the second string. | |
318 | OPERANDS[3] is the length. | |
c2daae6e | 319 | OPERANDS[4] is the known alignment. */ |
b421555d | 320 | bool |
321 | sh_expand_cmpnstr (rtx *operands) | |
322 | { | |
c2daae6e | 323 | rtx addr1 = operands[1]; |
324 | rtx addr2 = operands[2]; | |
325 | rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); | |
326 | rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); | |
b421555d | 327 | rtx tmp1 = gen_reg_rtx (SImode); |
328 | rtx tmp2 = gen_reg_rtx (SImode); | |
329 | ||
c2daae6e | 330 | rtx jump; |
b421555d | 331 | rtx L_return = gen_label_rtx (); |
332 | rtx L_loop_byte = gen_label_rtx (); | |
333 | rtx L_end_loop_byte = gen_label_rtx (); | |
334 | ||
b421555d | 335 | rtx len = force_reg (SImode, operands[3]); |
75c9129c | 336 | int constp = CONST_INT_P (operands[3]); |
b421555d | 337 | |
75c9129c | 338 | /* Loop on a register count. */ |
339 | if (constp) | |
b421555d | 340 | { |
75c9129c | 341 | rtx tmp0 = gen_reg_rtx (SImode); |
b421555d | 342 | rtx tmp3 = gen_reg_rtx (SImode); |
343 | rtx lenw = gen_reg_rtx (SImode); | |
b421555d | 344 | |
75c9129c | 345 | rtx L_loop_long = gen_label_rtx (); |
346 | rtx L_end_loop_long = gen_label_rtx (); | |
b421555d | 347 | |
75c9129c | 348 | int align = INTVAL (operands[4]); |
349 | int bytes = INTVAL (operands[3]); | |
350 | int witers = bytes / 4; | |
351 | ||
352 | if (witers > 1) | |
353 | { | |
354 | addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); | |
355 | addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); | |
356 | ||
357 | emit_move_insn (tmp0, const0_rtx); | |
358 | ||
359 | if (align < 4) | |
360 | { | |
361 | emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr)); | |
362 | emit_insn (gen_tstsi_t (GEN_INT (3), tmp1)); | |
363 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
364 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
365 | } | |
366 | ||
367 | /* word count. Do we have iterations ? */ | |
368 | emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); | |
369 | ||
370 | /*start long loop. */ | |
371 | emit_label (L_loop_long); | |
372 | ||
373 | /* tmp2 is aligned, OK to load. */ | |
374 | emit_move_insn (tmp2, addr2); | |
375 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, | |
376 | GET_MODE_SIZE (SImode))); | |
377 | ||
378 | /* tmp1 is aligned, OK to load. */ | |
379 | emit_move_insn (tmp1, addr1); | |
380 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, | |
381 | GET_MODE_SIZE (SImode))); | |
382 | ||
383 | /* Is there a 0 byte ? */ | |
384 | emit_insn (gen_andsi3 (tmp3, tmp2, tmp1)); | |
385 | ||
386 | emit_insn (gen_cmpstr_t (tmp0, tmp3)); | |
387 | jump = emit_jump_insn (gen_branch_true (L_end_loop_long)); | |
388 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
389 | ||
390 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
391 | jump = emit_jump_insn (gen_branch_false (L_end_loop_long)); | |
392 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
393 | ||
394 | if (TARGET_SH2) | |
395 | emit_insn (gen_dect (lenw, lenw)); | |
396 | else | |
397 | { | |
398 | emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1))); | |
399 | emit_insn (gen_tstsi_t (lenw, lenw)); | |
400 | } | |
401 | ||
402 | jump = emit_jump_insn (gen_branch_false (L_loop_long)); | |
403 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
404 | ||
42e035a5 | 405 | int sbytes = bytes % 4; |
406 | ||
75c9129c | 407 | /* end loop. Reached max iterations. */ |
42e035a5 | 408 | if (! sbytes) |
75c9129c | 409 | { |
75c9129c | 410 | jump = emit_jump_insn (gen_jump_compact (L_return)); |
411 | emit_barrier_after (jump); | |
412 | } | |
413 | else | |
414 | { | |
42e035a5 | 415 | /* Remaining bytes to check. */ |
416 | ||
417 | addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); | |
418 | addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); | |
419 | ||
420 | while (sbytes--) | |
421 | { | |
422 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
423 | emit_insn (gen_extendqisi2 (tmp2, addr2)); | |
424 | ||
425 | emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); | |
426 | jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); | |
427 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
428 | ||
429 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
430 | if (flag_delayed_branch) | |
431 | emit_insn (gen_zero_extendqisi2 (tmp2, | |
432 | gen_lowpart (QImode, | |
433 | tmp2))); | |
434 | jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); | |
435 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
436 | ||
437 | addr1 = adjust_address (addr1, QImode, | |
438 | GET_MODE_SIZE (QImode)); | |
439 | addr2 = adjust_address (addr2, QImode, | |
440 | GET_MODE_SIZE (QImode)); | |
441 | } | |
442 | ||
443 | jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte)); | |
75c9129c | 444 | emit_barrier_after (jump); |
445 | } | |
446 | ||
447 | emit_label (L_end_loop_long); | |
448 | ||
449 | /* Found last word. Restart it byte per byte. */ | |
42e035a5 | 450 | |
75c9129c | 451 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, |
452 | -GET_MODE_SIZE (SImode))); | |
453 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, | |
454 | -GET_MODE_SIZE (SImode))); | |
75c9129c | 455 | |
42e035a5 | 456 | /* fall thru. */ |
457 | } | |
75c9129c | 458 | |
459 | addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); | |
460 | addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); | |
461 | ||
462 | while (bytes--) | |
463 | { | |
464 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
465 | emit_insn (gen_extendqisi2 (tmp2, addr2)); | |
466 | ||
467 | emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); | |
468 | jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); | |
469 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
470 | ||
471 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
472 | if (flag_delayed_branch) | |
42e035a5 | 473 | emit_insn (gen_zero_extendqisi2 (tmp2, |
474 | gen_lowpart (QImode, tmp2))); | |
75c9129c | 475 | jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); |
476 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
477 | ||
478 | addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode)); | |
479 | addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode)); | |
480 | } | |
481 | ||
482 | jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte)); | |
483 | emit_barrier_after (jump); | |
b421555d | 484 | } |
485 | ||
75c9129c | 486 | addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); |
487 | addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); | |
c2daae6e | 488 | |
489 | emit_label (L_loop_byte); | |
490 | ||
491 | emit_insn (gen_extendqisi2 (tmp2, addr2)); | |
492 | emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1)); | |
493 | ||
494 | emit_insn (gen_extendqisi2 (tmp1, addr1)); | |
495 | emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1)); | |
496 | ||
497 | emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); | |
498 | jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); | |
499 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
500 | ||
501 | emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); | |
502 | if (flag_delayed_branch) | |
503 | emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); | |
504 | jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); | |
505 | add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); | |
506 | ||
507 | if (TARGET_SH2) | |
508 | emit_insn (gen_dect (len, len)); | |
509 | else | |
510 | { | |
511 | emit_insn (gen_addsi3 (len, len, GEN_INT (-1))); | |
512 | emit_insn (gen_tstsi_t (len, len)); | |
513 | } | |
514 | ||
515 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
516 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
517 | /* end byte loop. */ | |
518 | ||
519 | emit_label (L_end_loop_byte); | |
520 | ||
521 | if (! flag_delayed_branch) | |
522 | emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); | |
523 | emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1))); | |
524 | ||
525 | emit_label (L_return); | |
526 | ||
527 | emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); | |
528 | ||
529 | return true; | |
530 | } | |
531 | ||
532 | /* Emit code to perform a strlen | |
533 | ||
534 | OPERANDS[0] is the destination. | |
535 | OPERANDS[1] is the string. | |
536 | OPERANDS[2] is the char to search. | |
537 | OPERANDS[3] is the alignment. */ | |
538 | bool | |
539 | sh_expand_strlen (rtx *operands) | |
540 | { | |
541 | rtx addr1 = operands[1]; | |
542 | rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0)); | |
543 | rtx start_addr = gen_reg_rtx (Pmode); | |
544 | rtx tmp0 = gen_reg_rtx (SImode); | |
545 | rtx tmp1 = gen_reg_rtx (SImode); | |
546 | rtx L_return = gen_label_rtx (); | |
547 | rtx L_loop_byte = gen_label_rtx (); | |
548 | ||
549 | rtx jump; | |
550 | rtx L_loop_long = gen_label_rtx (); | |
551 | rtx L_end_loop_long = gen_label_rtx (); | |
552 | ||
553 | int align = INTVAL (operands[3]); | |
554 | ||
555 | emit_move_insn (operands[0], GEN_INT (-1)); | |
556 | ||
557 | /* remember start of string. */ | |
558 | emit_move_insn (start_addr, current_addr); | |
559 | ||
560 | if (align < 4) | |
561 | { | |
562 | emit_insn (gen_tstsi_t (GEN_INT (3), current_addr)); | |
563 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
564 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
565 | } | |
566 | ||
567 | emit_move_insn (tmp0, operands[2]); | |
568 | ||
569 | addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0); | |
570 | ||
571 | /*start long loop. */ | |
572 | emit_label (L_loop_long); | |
573 | ||
574 | /* tmp1 is aligned, OK to load. */ | |
575 | emit_move_insn (tmp1, addr1); | |
576 | emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4)); | |
577 | ||
578 | /* Is there a 0 byte ? */ | |
579 | emit_insn (gen_cmpstr_t (tmp0, tmp1)); | |
b421555d | 580 | |
c2daae6e | 581 | jump = emit_jump_insn (gen_branch_false (L_loop_long)); |
582 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
583 | /* end loop. */ | |
b421555d | 584 | |
c2daae6e | 585 | emit_label (L_end_loop_long); |
b421555d | 586 | |
c2daae6e | 587 | emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4)); |
b421555d | 588 | |
c2daae6e | 589 | /* start byte loop. */ |
590 | addr1 = adjust_address (addr1, QImode, 0); | |
b421555d | 591 | |
c2daae6e | 592 | emit_label (L_loop_byte); |
b421555d | 593 | |
c2daae6e | 594 | emit_insn (gen_extendqisi2 (tmp1, addr1)); |
595 | emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); | |
b421555d | 596 | |
c2daae6e | 597 | emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); |
598 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
599 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
b421555d | 600 | |
c2daae6e | 601 | /* end loop. */ |
b421555d | 602 | |
c2daae6e | 603 | emit_label (L_return); |
b421555d | 604 | |
c2daae6e | 605 | emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1))); |
b421555d | 606 | |
c2daae6e | 607 | emit_insn (gen_subsi3 (operands[0], current_addr, start_addr)); |
b421555d | 608 | |
c2daae6e | 609 | return true; |
b421555d | 610 | } |
1878fb5b | 611 | |
612 | /* Emit code to perform a memset | |
613 | ||
614 | OPERANDS[0] is the destination. | |
615 | OPERANDS[1] is the size; | |
616 | OPERANDS[2] is the char to search. | |
617 | OPERANDS[3] is the alignment. */ | |
618 | void | |
619 | sh_expand_setmem (rtx *operands) | |
620 | { | |
621 | rtx L_loop_byte = gen_label_rtx (); | |
622 | rtx L_loop_word = gen_label_rtx (); | |
623 | rtx L_return = gen_label_rtx (); | |
624 | rtx jump; | |
625 | rtx dest = copy_rtx (operands[0]); | |
626 | rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0)); | |
627 | rtx val = force_reg (SImode, operands[2]); | |
628 | int align = INTVAL (operands[3]); | |
629 | int count = 0; | |
630 | rtx len = force_reg (SImode, operands[1]); | |
631 | ||
632 | if (! CONST_INT_P (operands[1])) | |
633 | return; | |
634 | ||
635 | count = INTVAL (operands[1]); | |
636 | ||
637 | if (CONST_INT_P (operands[2]) | |
638 | && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8) | |
639 | { | |
640 | rtx lenw = gen_reg_rtx (SImode); | |
641 | ||
642 | if (align < 4) | |
643 | { | |
644 | emit_insn (gen_tstsi_t (GEN_INT (3), dest_addr)); | |
645 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
646 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
647 | } | |
648 | ||
649 | /* word count. Do we have iterations ? */ | |
650 | emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); | |
651 | ||
652 | dest = adjust_automodify_address (dest, SImode, dest_addr, 0); | |
653 | ||
654 | /* start loop. */ | |
655 | emit_label (L_loop_word); | |
656 | ||
657 | if (TARGET_SH2) | |
658 | emit_insn (gen_dect (lenw, lenw)); | |
659 | else | |
660 | { | |
661 | emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1))); | |
662 | emit_insn (gen_tstsi_t (lenw, lenw)); | |
663 | } | |
664 | ||
665 | emit_move_insn (dest, val); | |
666 | emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, | |
667 | GET_MODE_SIZE (SImode))); | |
668 | ||
669 | ||
670 | jump = emit_jump_insn (gen_branch_false (L_loop_word)); | |
671 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
672 | count = count % 4; | |
673 | ||
674 | dest = adjust_address (dest, QImode, 0); | |
675 | ||
676 | val = gen_lowpart (QImode, val); | |
677 | ||
678 | while (count--) | |
679 | { | |
680 | emit_move_insn (dest, val); | |
681 | emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, | |
682 | GET_MODE_SIZE (QImode))); | |
683 | } | |
684 | ||
685 | jump = emit_jump_insn (gen_jump_compact (L_return)); | |
686 | emit_barrier_after (jump); | |
687 | } | |
688 | ||
689 | dest = adjust_automodify_address (dest, QImode, dest_addr, 0); | |
690 | ||
691 | /* start loop. */ | |
692 | emit_label (L_loop_byte); | |
693 | ||
694 | if (TARGET_SH2) | |
695 | emit_insn (gen_dect (len, len)); | |
696 | else | |
697 | { | |
698 | emit_insn (gen_addsi3 (len, len, GEN_INT (-1))); | |
699 | emit_insn (gen_tstsi_t (len, len)); | |
700 | } | |
701 | ||
702 | val = gen_lowpart (QImode, val); | |
703 | emit_move_insn (dest, val); | |
704 | emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, | |
705 | GET_MODE_SIZE (QImode))); | |
706 | ||
707 | jump = emit_jump_insn (gen_branch_false (L_loop_byte)); | |
708 | add_int_reg_note (jump, REG_BR_PROB, prob_likely); | |
709 | ||
710 | emit_label (L_return); | |
711 | ||
712 | return; | |
713 | } |