]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/sh/sh-mem.cc
gcc/ChangeLog
[thirdparty/gcc.git] / gcc / config / sh / sh-mem.cc
CommitLineData
ccd57e8a 1/* Helper routines for memory move and comparison insns.
d353bf18 2 Copyright (C) 2013-2015 Free Software Foundation, Inc.
ccd57e8a 3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tm.h"
c1eb80de 24#include "function.h"
25#include "basic-block.h"
ccd57e8a 26#include "rtl.h"
27#include "tree.h"
c1eb80de 28#include "tm_p.h"
d53441c8 29#include "emit-rtl.h"
c1eb80de 30#include "explow.h"
ccd57e8a 31#include "expr.h"
ccd57e8a 32
33/* Like force_operand, but guarantees that VALUE ends up in TARGET. */
34static void
35force_into (rtx value, rtx target)
36{
37 value = force_operand (value, target);
38 if (! rtx_equal_p (value, target))
39 emit_insn (gen_move_insn (target, value));
40}
41
42/* Emit code to perform a block move. Choose the best method.
43
44 OPERANDS[0] is the destination.
45 OPERANDS[1] is the source.
46 OPERANDS[2] is the size.
47 OPERANDS[3] is the alignment safe to use. */
48bool
49expand_block_move (rtx *operands)
50{
51 int align = INTVAL (operands[3]);
52 int constp = (CONST_INT_P (operands[2]));
53 int bytes = (constp ? INTVAL (operands[2]) : 0);
54
55 if (! constp)
56 return false;
57
58 /* If we could use mov.l to move words and dest is word-aligned, we
59 can use movua.l for loads and still generate a relatively short
60 and efficient sequence. */
5b271aff 61 if (TARGET_SH4A && align < 4
ccd57e8a 62 && MEM_ALIGN (operands[0]) >= 32
63 && can_move_by_pieces (bytes, 32))
64 {
65 rtx dest = copy_rtx (operands[0]);
66 rtx src = copy_rtx (operands[1]);
67 /* We could use different pseudos for each copied word, but
68 since movua can only load into r0, it's kind of
69 pointless. */
70 rtx temp = gen_reg_rtx (SImode);
71 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
72 int copied = 0;
73
74 while (copied + 4 <= bytes)
75 {
76 rtx to = adjust_address (dest, SImode, copied);
77 rtx from = adjust_automodify_address (src, BLKmode,
78 src_addr, copied);
79
80 set_mem_size (from, 4);
81 emit_insn (gen_movua (temp, from));
82 emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
83 emit_move_insn (to, temp);
84 copied += 4;
85 }
86
87 if (copied < bytes)
88 move_by_pieces (adjust_address (dest, BLKmode, copied),
89 adjust_automodify_address (src, BLKmode,
90 src_addr, copied),
91 bytes - copied, align, 0);
92
93 return true;
94 }
95
96 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
97 alignment, or if it isn't a multiple of 4 bytes, then fail. */
98 if (align < 4 || (bytes % 4 != 0))
99 return false;
100
101 if (TARGET_HARD_SH4)
102 {
103 if (bytes < 12)
104 return false;
105 else if (bytes == 12)
106 {
107 rtx func_addr_rtx = gen_reg_rtx (Pmode);
108 rtx r4 = gen_rtx_REG (SImode, 4);
109 rtx r5 = gen_rtx_REG (SImode, 5);
110
bcc58dc6 111 rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
112 SFUNC_STATIC).lab;
ccd57e8a 113 force_into (XEXP (operands[0], 0), r4);
114 force_into (XEXP (operands[1], 0), r5);
bcc58dc6 115 emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
ccd57e8a 116 return true;
117 }
118 else if (! optimize_size)
119 {
ccd57e8a 120 rtx func_addr_rtx = gen_reg_rtx (Pmode);
ccd57e8a 121 rtx r4 = gen_rtx_REG (SImode, 4);
122 rtx r5 = gen_rtx_REG (SImode, 5);
123 rtx r6 = gen_rtx_REG (SImode, 6);
124
bcc58dc6 125 rtx lab = function_symbol (func_addr_rtx, bytes & 4
126 ? "__movmem_i4_odd"
127 : "__movmem_i4_even",
128 SFUNC_STATIC).lab;
ccd57e8a 129 force_into (XEXP (operands[0], 0), r4);
130 force_into (XEXP (operands[1], 0), r5);
131
bcc58dc6 132 int dwords = bytes >> 3;
ccd57e8a 133 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
bcc58dc6 134 emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
ccd57e8a 135 return true;
136 }
137 else
138 return false;
139 }
140 if (bytes < 64)
141 {
142 char entry[30];
143 rtx func_addr_rtx = gen_reg_rtx (Pmode);
144 rtx r4 = gen_rtx_REG (SImode, 4);
145 rtx r5 = gen_rtx_REG (SImode, 5);
146
147 sprintf (entry, "__movmemSI%d", bytes);
bcc58dc6 148 rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
ccd57e8a 149 force_into (XEXP (operands[0], 0), r4);
150 force_into (XEXP (operands[1], 0), r5);
bcc58dc6 151 emit_insn (gen_block_move_real (func_addr_rtx, lab));
ccd57e8a 152 return true;
153 }
154
155 /* This is the same number of bytes as a memcpy call, but to a different
156 less common function name, so this will occasionally use more space. */
157 if (! optimize_size)
158 {
159 rtx func_addr_rtx = gen_reg_rtx (Pmode);
160 int final_switch, while_loop;
161 rtx r4 = gen_rtx_REG (SImode, 4);
162 rtx r5 = gen_rtx_REG (SImode, 5);
163 rtx r6 = gen_rtx_REG (SImode, 6);
164
bcc58dc6 165 rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
ccd57e8a 166 force_into (XEXP (operands[0], 0), r4);
167 force_into (XEXP (operands[1], 0), r5);
168
169 /* r6 controls the size of the move. 16 is decremented from it
170 for each 64 bytes moved. Then the negative bit left over is used
171 as an index into a list of move instructions. e.g., a 72 byte move
172 would be set up with size(r6) = 14, for one iteration through the
173 big while loop, and a switch of -2 for the last part. */
174
175 final_switch = 16 - ((bytes / 4) % 16);
176 while_loop = ((bytes / 4) / 16 - 1) * 16;
177 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
bcc58dc6 178 emit_insn (gen_block_lump_real (func_addr_rtx, lab));
ccd57e8a 179 return true;
180 }
181
182 return false;
183}
184
f4a24c51 185static const int prob_unlikely = REG_BR_PROB_BASE / 10;
186static const int prob_likely = REG_BR_PROB_BASE / 4;
c2daae6e 187
ccd57e8a 188/* Emit code to perform a strcmp.
189
190 OPERANDS[0] is the destination.
191 OPERANDS[1] is the first string.
192 OPERANDS[2] is the second string.
c2daae6e 193 OPERANDS[3] is the known alignment. */
ccd57e8a 194bool
195sh_expand_cmpstr (rtx *operands)
196{
c2daae6e 197 rtx addr1 = operands[1];
198 rtx addr2 = operands[2];
199 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
200 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
ccd57e8a 201 rtx tmp0 = gen_reg_rtx (SImode);
202 rtx tmp1 = gen_reg_rtx (SImode);
203 rtx tmp2 = gen_reg_rtx (SImode);
204 rtx tmp3 = gen_reg_rtx (SImode);
205
c2daae6e 206 rtx jump;
79f6a8ed 207 rtx_code_label *L_return = gen_label_rtx ();
208 rtx_code_label *L_loop_byte = gen_label_rtx ();
209 rtx_code_label *L_end_loop_byte = gen_label_rtx ();
210 rtx_code_label *L_loop_long = gen_label_rtx ();
211 rtx_code_label *L_end_loop_long = gen_label_rtx ();
ccd57e8a 212
b0d054a9 213 const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
214 const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
ccd57e8a 215
b0d054a9 216 if (addr1_alignment < 4 && addr2_alignment < 4)
b421555d 217 {
218 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
f07efc97 219 emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
b421555d 220 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
221 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
222 }
b0d054a9 223 else if (addr1_alignment < 4 && addr2_alignment >= 4)
224 {
225 emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
226 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
227 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
228 }
229 else if (addr1_alignment >= 4 && addr2_alignment < 4)
230 {
231 emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
232 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
233 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
234 }
ccd57e8a 235
c2daae6e 236 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
237 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
ccd57e8a 238
239 /* tmp2 is aligned, OK to load. */
240 emit_move_insn (tmp3, addr2);
241 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
242
f4a24c51 243 /* start long loop. */
ccd57e8a 244 emit_label (L_loop_long);
245
246 emit_move_insn (tmp2, tmp3);
247
248 /* tmp1 is aligned, OK to load. */
249 emit_move_insn (tmp1, addr1);
250 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
251
252 /* Is there a 0 byte ? */
253 emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
254
255 emit_insn (gen_cmpstr_t (tmp0, tmp3));
256 jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
257 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
258
259 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
260
261 /* tmp2 is aligned, OK to load. */
262 emit_move_insn (tmp3, addr2);
263 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
264
265 jump = emit_jump_insn (gen_branch_true (L_loop_long));
266 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
267 /* end loop. */
268
b421555d 269 /* Fallthu, substract words. */
ccd57e8a 270 if (TARGET_LITTLE_ENDIAN)
271 {
272 rtx low_1 = gen_lowpart (HImode, tmp1);
273 rtx low_2 = gen_lowpart (HImode, tmp2);
274
275 emit_insn (gen_rotlhi3_8 (low_1, low_1));
276 emit_insn (gen_rotlhi3_8 (low_2, low_2));
277 emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
278 emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
279 emit_insn (gen_rotlhi3_8 (low_1, low_1));
280 emit_insn (gen_rotlhi3_8 (low_2, low_2));
281 }
282
283 jump = emit_jump_insn (gen_jump_compact (L_return));
284 emit_barrier_after (jump);
285
ccd57e8a 286 emit_label (L_end_loop_long);
287
288 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
289 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
290
b421555d 291 /* start byte loop. */
c2daae6e 292 addr1 = adjust_address (addr1, QImode, 0);
293 addr2 = adjust_address (addr2, QImode, 0);
b421555d 294
ccd57e8a 295 emit_label (L_loop_byte);
296
297 emit_insn (gen_extendqisi2 (tmp2, addr2));
298 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
299
300 emit_insn (gen_extendqisi2 (tmp1, addr1));
301 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
302
303 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
304 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
305 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
306
307 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
b421555d 308 if (flag_delayed_branch)
309 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
310 jump = emit_jump_insn (gen_branch_true (L_loop_byte));
ccd57e8a 311 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
312 /* end loop. */
313
314 emit_label (L_end_loop_byte);
315
b421555d 316 if (! flag_delayed_branch)
317 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
ccd57e8a 318 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
319
320 emit_label (L_return);
321
322 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
323
324 return true;
325}
326
b421555d 327/* Emit code to perform a strncmp.
328
329 OPERANDS[0] is the destination.
330 OPERANDS[1] is the first string.
331 OPERANDS[2] is the second string.
332 OPERANDS[3] is the length.
c2daae6e 333 OPERANDS[4] is the known alignment. */
b421555d 334bool
335sh_expand_cmpnstr (rtx *operands)
336{
c2daae6e 337 rtx addr1 = operands[1];
338 rtx addr2 = operands[2];
339 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
340 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
b421555d 341 rtx tmp1 = gen_reg_rtx (SImode);
342 rtx tmp2 = gen_reg_rtx (SImode);
343
c2daae6e 344 rtx jump;
79f6a8ed 345 rtx_code_label *L_return = gen_label_rtx ();
346 rtx_code_label *L_loop_byte = gen_label_rtx ();
347 rtx_code_label *L_end_loop_byte = gen_label_rtx ();
b421555d 348
b421555d 349 rtx len = force_reg (SImode, operands[3]);
75c9129c 350 int constp = CONST_INT_P (operands[3]);
b421555d 351
b0d054a9 352 const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
353 const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
354
f4a24c51 355 /* Loop on a register count. */
75c9129c 356 if (constp)
b421555d 357 {
75c9129c 358 rtx tmp0 = gen_reg_rtx (SImode);
b421555d 359 rtx tmp3 = gen_reg_rtx (SImode);
360 rtx lenw = gen_reg_rtx (SImode);
b421555d 361
79f6a8ed 362 rtx_code_label *L_loop_long = gen_label_rtx ();
363 rtx_code_label *L_end_loop_long = gen_label_rtx ();
b421555d 364
75c9129c 365 int bytes = INTVAL (operands[3]);
366 int witers = bytes / 4;
367
368 if (witers > 1)
f4a24c51 369 {
370 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
371 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
372
373 emit_move_insn (tmp0, const0_rtx);
374
b0d054a9 375 if (addr1_alignment < 4 && addr2_alignment < 4)
f4a24c51 376 {
377 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
f07efc97 378 emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
f4a24c51 379 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
380 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
381 }
b0d054a9 382 else if (addr1_alignment < 4 && addr2_alignment >= 4)
383 {
384 emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
385 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
386 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
387 }
388 else if (addr1_alignment >= 4 && addr2_alignment < 4)
389 {
390 emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
391 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
392 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
393 }
f4a24c51 394
395 /* word count. Do we have iterations ? */
396 emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
397
398 /* start long loop. */
399 emit_label (L_loop_long);
400
401 /* tmp2 is aligned, OK to load. */
402 emit_move_insn (tmp2, addr2);
403 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
404 GET_MODE_SIZE (SImode)));
405
406 /* tmp1 is aligned, OK to load. */
407 emit_move_insn (tmp1, addr1);
408 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
409 GET_MODE_SIZE (SImode)));
410
411 /* Is there a 0 byte ? */
412 emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
413
414 emit_insn (gen_cmpstr_t (tmp0, tmp3));
415 jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
416 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
417
418 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
419 jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
420 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
421
422 if (TARGET_SH2)
423 emit_insn (gen_dect (lenw, lenw));
424 else
425 {
426 emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
427 emit_insn (gen_tstsi_t (lenw, lenw));
428 }
429
430 jump = emit_jump_insn (gen_branch_false (L_loop_long));
431 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
432
433 int sbytes = bytes % 4;
434
435 /* end loop. Reached max iterations. */
436 if (sbytes == 0)
437 {
7756601d 438 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
f4a24c51 439 jump = emit_jump_insn (gen_jump_compact (L_return));
440 emit_barrier_after (jump);
441 }
442 else
443 {
444 /* Remaining bytes to check. */
445
446 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
447 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
448
449 while (sbytes--)
450 {
451 emit_insn (gen_extendqisi2 (tmp1, addr1));
452 emit_insn (gen_extendqisi2 (tmp2, addr2));
453
454 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
455 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
456 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
457
458 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
459 if (flag_delayed_branch)
460 emit_insn (gen_zero_extendqisi2 (tmp2,
461 gen_lowpart (QImode,
462 tmp2)));
463 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
464 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
465
466 addr1 = adjust_address (addr1, QImode,
467 GET_MODE_SIZE (QImode));
468 addr2 = adjust_address (addr2, QImode,
469 GET_MODE_SIZE (QImode));
470 }
471
472 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
473 emit_barrier_after (jump);
474 }
475
476 emit_label (L_end_loop_long);
477
478 /* Found last word. Restart it byte per byte. */
479
480 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
481 -GET_MODE_SIZE (SImode)));
482 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
483 -GET_MODE_SIZE (SImode)));
484
485 /* fall thru. */
486 }
75c9129c 487
488 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
489 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
490
491 while (bytes--)
f4a24c51 492 {
493 emit_insn (gen_extendqisi2 (tmp1, addr1));
494 emit_insn (gen_extendqisi2 (tmp2, addr2));
495
496 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
497 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
498 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
499
500 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
501 if (flag_delayed_branch)
502 emit_insn (gen_zero_extendqisi2 (tmp2,
503 gen_lowpart (QImode, tmp2)));
504 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
505 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
506
507 addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
508 addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
509 }
75c9129c 510
511 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
512 emit_barrier_after (jump);
b421555d 513 }
7756601d 514 else
515 {
516 emit_insn (gen_cmpeqsi_t (len, const0_rtx));
517 emit_move_insn (operands[0], const0_rtx);
518 jump = emit_jump_insn (gen_branch_true (L_return));
519 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
520 }
b421555d 521
75c9129c 522 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
523 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
c2daae6e 524
525 emit_label (L_loop_byte);
526
527 emit_insn (gen_extendqisi2 (tmp2, addr2));
528 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
529
530 emit_insn (gen_extendqisi2 (tmp1, addr1));
531 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
532
533 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
534 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
535 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
536
537 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
538 if (flag_delayed_branch)
539 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
540 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
541 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
542
543 if (TARGET_SH2)
544 emit_insn (gen_dect (len, len));
545 else
546 {
547 emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
548 emit_insn (gen_tstsi_t (len, len));
549 }
550
551 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
552 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
553 /* end byte loop. */
554
555 emit_label (L_end_loop_byte);
556
557 if (! flag_delayed_branch)
558 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
559 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
560
c2daae6e 561 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
562
7756601d 563 emit_label (L_return);
564
c2daae6e 565 return true;
566}
567
f4a24c51 568/* Emit code to perform a strlen.
c2daae6e 569
570 OPERANDS[0] is the destination.
571 OPERANDS[1] is the string.
572 OPERANDS[2] is the char to search.
573 OPERANDS[3] is the alignment. */
574bool
575sh_expand_strlen (rtx *operands)
576{
577 rtx addr1 = operands[1];
578 rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
579 rtx start_addr = gen_reg_rtx (Pmode);
580 rtx tmp0 = gen_reg_rtx (SImode);
581 rtx tmp1 = gen_reg_rtx (SImode);
79f6a8ed 582 rtx_code_label *L_return = gen_label_rtx ();
583 rtx_code_label *L_loop_byte = gen_label_rtx ();
c2daae6e 584
585 rtx jump;
79f6a8ed 586 rtx_code_label *L_loop_long = gen_label_rtx ();
587 rtx_code_label *L_end_loop_long = gen_label_rtx ();
c2daae6e 588
589 int align = INTVAL (operands[3]);
590
591 emit_move_insn (operands[0], GEN_INT (-1));
592
593 /* remember start of string. */
594 emit_move_insn (start_addr, current_addr);
595
596 if (align < 4)
597 {
f07efc97 598 emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
c2daae6e 599 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
600 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
601 }
602
603 emit_move_insn (tmp0, operands[2]);
604
605 addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
606
9c00010f 607 /* start long loop. */
c2daae6e 608 emit_label (L_loop_long);
609
610 /* tmp1 is aligned, OK to load. */
611 emit_move_insn (tmp1, addr1);
612 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
613
614 /* Is there a 0 byte ? */
615 emit_insn (gen_cmpstr_t (tmp0, tmp1));
b421555d 616
c2daae6e 617 jump = emit_jump_insn (gen_branch_false (L_loop_long));
618 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
619 /* end loop. */
b421555d 620
c2daae6e 621 emit_label (L_end_loop_long);
b421555d 622
c2daae6e 623 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
b421555d 624
c2daae6e 625 addr1 = adjust_address (addr1, QImode, 0);
b421555d 626
751d4d6f 627 /* unroll remaining bytes. */
9c00010f 628 for (int i = 0; i < 4; ++i)
629 {
630 emit_insn (gen_extendqisi2 (tmp1, addr1));
631 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
632 emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
633 jump = emit_jump_insn (gen_branch_true (L_return));
634 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
635 }
751d4d6f 636
751d4d6f 637 emit_barrier_after (jump);
638
639 /* start byte loop. */
c2daae6e 640 emit_label (L_loop_byte);
b421555d 641
c2daae6e 642 emit_insn (gen_extendqisi2 (tmp1, addr1));
643 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
b421555d 644
c2daae6e 645 emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
646 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
647 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
b421555d 648
c2daae6e 649 /* end loop. */
b421555d 650
751d4d6f 651 emit_label (L_return);
652
9c00010f 653 emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
c2daae6e 654 emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
b421555d 655
c2daae6e 656 return true;
b421555d 657}
1878fb5b 658
f4a24c51 659/* Emit code to perform a memset.
1878fb5b 660
661 OPERANDS[0] is the destination.
662 OPERANDS[1] is the size;
663 OPERANDS[2] is the char to search.
664 OPERANDS[3] is the alignment. */
665void
666sh_expand_setmem (rtx *operands)
667{
79f6a8ed 668 rtx_code_label *L_loop_byte = gen_label_rtx ();
669 rtx_code_label *L_loop_word = gen_label_rtx ();
670 rtx_code_label *L_return = gen_label_rtx ();
1878fb5b 671 rtx jump;
672 rtx dest = copy_rtx (operands[0]);
673 rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
674 rtx val = force_reg (SImode, operands[2]);
675 int align = INTVAL (operands[3]);
1878fb5b 676 rtx len = force_reg (SImode, operands[1]);
677
678 if (! CONST_INT_P (operands[1]))
679 return;
680
f4a24c51 681 int count = INTVAL (operands[1]);
1878fb5b 682
683 if (CONST_INT_P (operands[2])
684 && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
685 {
686 rtx lenw = gen_reg_rtx (SImode);
687
688 if (align < 4)
f4a24c51 689 {
f07efc97 690 emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
f4a24c51 691 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
692 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
693 }
1878fb5b 694
f4a24c51 695 /* word count. Do we have iterations ? */
1878fb5b 696 emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
697
698 dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
699
700 /* start loop. */
701 emit_label (L_loop_word);
702
703 if (TARGET_SH2)
704 emit_insn (gen_dect (lenw, lenw));
705 else
f4a24c51 706 {
707 emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
708 emit_insn (gen_tstsi_t (lenw, lenw));
709 }
1878fb5b 710
711 emit_move_insn (dest, val);
712 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
f4a24c51 713 GET_MODE_SIZE (SImode)));
1878fb5b 714
715
716 jump = emit_jump_insn (gen_branch_false (L_loop_word));
717 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
718 count = count % 4;
719
720 dest = adjust_address (dest, QImode, 0);
721
722 val = gen_lowpart (QImode, val);
723
724 while (count--)
f4a24c51 725 {
726 emit_move_insn (dest, val);
727 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
728 GET_MODE_SIZE (QImode)));
729 }
1878fb5b 730
731 jump = emit_jump_insn (gen_jump_compact (L_return));
732 emit_barrier_after (jump);
733 }
734
735 dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
736
737 /* start loop. */
738 emit_label (L_loop_byte);
739
740 if (TARGET_SH2)
741 emit_insn (gen_dect (len, len));
742 else
743 {
744 emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
745 emit_insn (gen_tstsi_t (len, len));
746 }
747
748 val = gen_lowpart (QImode, val);
749 emit_move_insn (dest, val);
750 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
751 GET_MODE_SIZE (QImode)));
752
753 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
754 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
755
756 emit_label (L_return);
1878fb5b 757}