]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sh/sh-mem.cc
Move MEMMODEL_* from coretypes.h to memmodel.h
[thirdparty/gcc.git] / gcc / config / sh / sh-mem.cc
1 /* Helper routines for memory move and comparison insns.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "function.h"
25 #include "basic-block.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "tm_p.h"
30 #include "emit-rtl.h"
31 #include "explow.h"
32 #include "expr.h"
33
34 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
35 static void
36 force_into (rtx value, rtx target)
37 {
38 value = force_operand (value, target);
39 if (! rtx_equal_p (value, target))
40 emit_insn (gen_move_insn (target, value));
41 }
42
43 /* Emit code to perform a block move. Choose the best method.
44
45 OPERANDS[0] is the destination.
46 OPERANDS[1] is the source.
47 OPERANDS[2] is the size.
48 OPERANDS[3] is the alignment safe to use. */
49 bool
50 expand_block_move (rtx *operands)
51 {
52 int align = INTVAL (operands[3]);
53 int constp = (CONST_INT_P (operands[2]));
54 int bytes = (constp ? INTVAL (operands[2]) : 0);
55
56 if (! constp)
57 return false;
58
59 /* If we could use mov.l to move words and dest is word-aligned, we
60 can use movua.l for loads and still generate a relatively short
61 and efficient sequence. */
62 if (TARGET_SH4A && align < 4
63 && MEM_ALIGN (operands[0]) >= 32
64 && can_move_by_pieces (bytes, 32))
65 {
66 rtx dest = copy_rtx (operands[0]);
67 rtx src = copy_rtx (operands[1]);
68 /* We could use different pseudos for each copied word, but
69 since movua can only load into r0, it's kind of
70 pointless. */
71 rtx temp = gen_reg_rtx (SImode);
72 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
73 int copied = 0;
74
75 while (copied + 4 <= bytes)
76 {
77 rtx to = adjust_address (dest, SImode, copied);
78 rtx from = adjust_automodify_address (src, BLKmode,
79 src_addr, copied);
80
81 set_mem_size (from, 4);
82 emit_insn (gen_movua (temp, from));
83 emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
84 emit_move_insn (to, temp);
85 copied += 4;
86 }
87
88 if (copied < bytes)
89 move_by_pieces (adjust_address (dest, BLKmode, copied),
90 adjust_automodify_address (src, BLKmode,
91 src_addr, copied),
92 bytes - copied, align, 0);
93
94 return true;
95 }
96
97 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
98 alignment, or if it isn't a multiple of 4 bytes, then fail. */
99 if (align < 4 || (bytes % 4 != 0))
100 return false;
101
102 if (TARGET_HARD_SH4)
103 {
104 if (bytes < 12)
105 return false;
106 else if (bytes == 12)
107 {
108 rtx func_addr_rtx = gen_reg_rtx (Pmode);
109 rtx r4 = gen_rtx_REG (SImode, 4);
110 rtx r5 = gen_rtx_REG (SImode, 5);
111
112 rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
113 SFUNC_STATIC).lab;
114 force_into (XEXP (operands[0], 0), r4);
115 force_into (XEXP (operands[1], 0), r5);
116 emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
117 return true;
118 }
119 else if (! optimize_size)
120 {
121 rtx func_addr_rtx = gen_reg_rtx (Pmode);
122 rtx r4 = gen_rtx_REG (SImode, 4);
123 rtx r5 = gen_rtx_REG (SImode, 5);
124 rtx r6 = gen_rtx_REG (SImode, 6);
125
126 rtx lab = function_symbol (func_addr_rtx, bytes & 4
127 ? "__movmem_i4_odd"
128 : "__movmem_i4_even",
129 SFUNC_STATIC).lab;
130 force_into (XEXP (operands[0], 0), r4);
131 force_into (XEXP (operands[1], 0), r5);
132
133 int dwords = bytes >> 3;
134 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
135 emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
136 return true;
137 }
138 else
139 return false;
140 }
141 if (bytes < 64)
142 {
143 char entry[30];
144 rtx func_addr_rtx = gen_reg_rtx (Pmode);
145 rtx r4 = gen_rtx_REG (SImode, 4);
146 rtx r5 = gen_rtx_REG (SImode, 5);
147
148 sprintf (entry, "__movmemSI%d", bytes);
149 rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
150 force_into (XEXP (operands[0], 0), r4);
151 force_into (XEXP (operands[1], 0), r5);
152 emit_insn (gen_block_move_real (func_addr_rtx, lab));
153 return true;
154 }
155
156 /* This is the same number of bytes as a memcpy call, but to a different
157 less common function name, so this will occasionally use more space. */
158 if (! optimize_size)
159 {
160 rtx func_addr_rtx = gen_reg_rtx (Pmode);
161 int final_switch, while_loop;
162 rtx r4 = gen_rtx_REG (SImode, 4);
163 rtx r5 = gen_rtx_REG (SImode, 5);
164 rtx r6 = gen_rtx_REG (SImode, 6);
165
166 rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
167 force_into (XEXP (operands[0], 0), r4);
168 force_into (XEXP (operands[1], 0), r5);
169
170 /* r6 controls the size of the move. 16 is decremented from it
171 for each 64 bytes moved. Then the negative bit left over is used
172 as an index into a list of move instructions. e.g., a 72 byte move
173 would be set up with size(r6) = 14, for one iteration through the
174 big while loop, and a switch of -2 for the last part. */
175
176 final_switch = 16 - ((bytes / 4) % 16);
177 while_loop = ((bytes / 4) / 16 - 1) * 16;
178 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
179 emit_insn (gen_block_lump_real (func_addr_rtx, lab));
180 return true;
181 }
182
183 return false;
184 }
185
186 static const int prob_unlikely = REG_BR_PROB_BASE / 10;
187 static const int prob_likely = REG_BR_PROB_BASE / 4;
188
189 /* Emit code to perform a strcmp.
190
191 OPERANDS[0] is the destination.
192 OPERANDS[1] is the first string.
193 OPERANDS[2] is the second string.
194 OPERANDS[3] is the known alignment. */
195 bool
196 sh_expand_cmpstr (rtx *operands)
197 {
198 rtx addr1 = operands[1];
199 rtx addr2 = operands[2];
200 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
201 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
202 rtx tmp0 = gen_reg_rtx (SImode);
203 rtx tmp1 = gen_reg_rtx (SImode);
204 rtx tmp2 = gen_reg_rtx (SImode);
205 rtx tmp3 = gen_reg_rtx (SImode);
206
207 rtx jump;
208 rtx_code_label *L_return = gen_label_rtx ();
209 rtx_code_label *L_loop_byte = gen_label_rtx ();
210 rtx_code_label *L_end_loop_byte = gen_label_rtx ();
211 rtx_code_label *L_loop_long = gen_label_rtx ();
212 rtx_code_label *L_end_loop_long = gen_label_rtx ();
213
214 const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
215 const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
216
217 if (addr1_alignment < 4 && addr2_alignment < 4)
218 {
219 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
220 emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
221 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
222 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
223 }
224 else if (addr1_alignment < 4 && addr2_alignment >= 4)
225 {
226 emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
227 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
228 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
229 }
230 else if (addr1_alignment >= 4 && addr2_alignment < 4)
231 {
232 emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
233 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
234 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
235 }
236
237 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
238 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
239
240 /* tmp2 is aligned, OK to load. */
241 emit_move_insn (tmp3, addr2);
242 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
243
244 /* start long loop. */
245 emit_label (L_loop_long);
246
247 emit_move_insn (tmp2, tmp3);
248
249 /* tmp1 is aligned, OK to load. */
250 emit_move_insn (tmp1, addr1);
251 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
252
253 /* Is there a 0 byte ? */
254 emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
255
256 emit_insn (gen_cmpstr_t (tmp0, tmp3));
257 jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
258 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
259
260 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
261
262 /* tmp2 is aligned, OK to load. */
263 emit_move_insn (tmp3, addr2);
264 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
265
266 jump = emit_jump_insn (gen_branch_true (L_loop_long));
267 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
268 /* end loop. */
269
270 /* Fallthu, substract words. */
271 if (TARGET_LITTLE_ENDIAN)
272 {
273 rtx low_1 = gen_lowpart (HImode, tmp1);
274 rtx low_2 = gen_lowpart (HImode, tmp2);
275
276 emit_insn (gen_rotlhi3_8 (low_1, low_1));
277 emit_insn (gen_rotlhi3_8 (low_2, low_2));
278 emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
279 emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
280 emit_insn (gen_rotlhi3_8 (low_1, low_1));
281 emit_insn (gen_rotlhi3_8 (low_2, low_2));
282 }
283
284 jump = emit_jump_insn (gen_jump_compact (L_return));
285 emit_barrier_after (jump);
286
287 emit_label (L_end_loop_long);
288
289 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
290 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
291
292 /* start byte loop. */
293 addr1 = adjust_address (addr1, QImode, 0);
294 addr2 = adjust_address (addr2, QImode, 0);
295
296 emit_label (L_loop_byte);
297
298 emit_insn (gen_extendqisi2 (tmp2, addr2));
299 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
300
301 emit_insn (gen_extendqisi2 (tmp1, addr1));
302 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
303
304 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
305 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
306 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
307
308 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
309 if (flag_delayed_branch)
310 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
311 jump = emit_jump_insn (gen_branch_true (L_loop_byte));
312 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
313 /* end loop. */
314
315 emit_label (L_end_loop_byte);
316
317 if (! flag_delayed_branch)
318 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
319 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
320
321 emit_label (L_return);
322
323 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
324
325 return true;
326 }
327
328 /* Emit code to perform a strncmp.
329
330 OPERANDS[0] is the destination.
331 OPERANDS[1] is the first string.
332 OPERANDS[2] is the second string.
333 OPERANDS[3] is the length.
334 OPERANDS[4] is the known alignment. */
335 bool
336 sh_expand_cmpnstr (rtx *operands)
337 {
338 rtx addr1 = operands[1];
339 rtx addr2 = operands[2];
340 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
341 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
342 rtx tmp1 = gen_reg_rtx (SImode);
343 rtx tmp2 = gen_reg_rtx (SImode);
344
345 rtx jump;
346 rtx_code_label *L_return = gen_label_rtx ();
347 rtx_code_label *L_loop_byte = gen_label_rtx ();
348 rtx_code_label *L_end_loop_byte = gen_label_rtx ();
349
350 rtx len = force_reg (SImode, operands[3]);
351 int constp = CONST_INT_P (operands[3]);
352
353 const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
354 const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
355
356 /* Loop on a register count. */
357 if (constp)
358 {
359 rtx tmp0 = gen_reg_rtx (SImode);
360 rtx tmp3 = gen_reg_rtx (SImode);
361 rtx lenw = gen_reg_rtx (SImode);
362
363 rtx_code_label *L_loop_long = gen_label_rtx ();
364 rtx_code_label *L_end_loop_long = gen_label_rtx ();
365
366 int bytes = INTVAL (operands[3]);
367 int witers = bytes / 4;
368
369 if (witers > 1)
370 {
371 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
372 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
373
374 emit_move_insn (tmp0, const0_rtx);
375
376 if (addr1_alignment < 4 && addr2_alignment < 4)
377 {
378 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
379 emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
380 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
381 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
382 }
383 else if (addr1_alignment < 4 && addr2_alignment >= 4)
384 {
385 emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
386 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
387 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
388 }
389 else if (addr1_alignment >= 4 && addr2_alignment < 4)
390 {
391 emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
392 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
393 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
394 }
395
396 /* word count. Do we have iterations ? */
397 emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
398
399 /* start long loop. */
400 emit_label (L_loop_long);
401
402 /* tmp2 is aligned, OK to load. */
403 emit_move_insn (tmp2, addr2);
404 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
405 GET_MODE_SIZE (SImode)));
406
407 /* tmp1 is aligned, OK to load. */
408 emit_move_insn (tmp1, addr1);
409 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
410 GET_MODE_SIZE (SImode)));
411
412 /* Is there a 0 byte ? */
413 emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
414
415 emit_insn (gen_cmpstr_t (tmp0, tmp3));
416 jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
417 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
418
419 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
420 jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
421 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
422
423 if (TARGET_SH2)
424 emit_insn (gen_dect (lenw, lenw));
425 else
426 {
427 emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
428 emit_insn (gen_tstsi_t (lenw, lenw));
429 }
430
431 jump = emit_jump_insn (gen_branch_false (L_loop_long));
432 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
433
434 int sbytes = bytes % 4;
435
436 /* end loop. Reached max iterations. */
437 if (sbytes == 0)
438 {
439 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
440 jump = emit_jump_insn (gen_jump_compact (L_return));
441 emit_barrier_after (jump);
442 }
443 else
444 {
445 /* Remaining bytes to check. */
446
447 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
448 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
449
450 while (sbytes--)
451 {
452 emit_insn (gen_extendqisi2 (tmp1, addr1));
453 emit_insn (gen_extendqisi2 (tmp2, addr2));
454
455 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
456 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
457 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
458
459 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
460 if (flag_delayed_branch)
461 emit_insn (gen_zero_extendqisi2 (tmp2,
462 gen_lowpart (QImode,
463 tmp2)));
464 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
465 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
466
467 addr1 = adjust_address (addr1, QImode,
468 GET_MODE_SIZE (QImode));
469 addr2 = adjust_address (addr2, QImode,
470 GET_MODE_SIZE (QImode));
471 }
472
473 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
474 emit_barrier_after (jump);
475 }
476
477 emit_label (L_end_loop_long);
478
479 /* Found last word. Restart it byte per byte. */
480
481 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
482 -GET_MODE_SIZE (SImode)));
483 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
484 -GET_MODE_SIZE (SImode)));
485
486 /* fall thru. */
487 }
488
489 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
490 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
491
492 while (bytes--)
493 {
494 emit_insn (gen_extendqisi2 (tmp1, addr1));
495 emit_insn (gen_extendqisi2 (tmp2, addr2));
496
497 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
498 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
499 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
500
501 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
502 if (flag_delayed_branch)
503 emit_insn (gen_zero_extendqisi2 (tmp2,
504 gen_lowpart (QImode, tmp2)));
505 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
506 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
507
508 addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
509 addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
510 }
511
512 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
513 emit_barrier_after (jump);
514 }
515 else
516 {
517 emit_insn (gen_cmpeqsi_t (len, const0_rtx));
518 emit_move_insn (operands[0], const0_rtx);
519 jump = emit_jump_insn (gen_branch_true (L_return));
520 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
521 }
522
523 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
524 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
525
526 emit_label (L_loop_byte);
527
528 emit_insn (gen_extendqisi2 (tmp2, addr2));
529 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
530
531 emit_insn (gen_extendqisi2 (tmp1, addr1));
532 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
533
534 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
535 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
536 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
537
538 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
539 if (flag_delayed_branch)
540 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
541 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
542 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
543
544 if (TARGET_SH2)
545 emit_insn (gen_dect (len, len));
546 else
547 {
548 emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
549 emit_insn (gen_tstsi_t (len, len));
550 }
551
552 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
553 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
554 /* end byte loop. */
555
556 emit_label (L_end_loop_byte);
557
558 if (! flag_delayed_branch)
559 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
560 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
561
562 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
563
564 emit_label (L_return);
565
566 return true;
567 }
568
569 /* Emit code to perform a strlen.
570
571 OPERANDS[0] is the destination.
572 OPERANDS[1] is the string.
573 OPERANDS[2] is the char to search.
574 OPERANDS[3] is the alignment. */
575 bool
576 sh_expand_strlen (rtx *operands)
577 {
578 rtx addr1 = operands[1];
579 rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
580 rtx start_addr = gen_reg_rtx (Pmode);
581 rtx tmp0 = gen_reg_rtx (SImode);
582 rtx tmp1 = gen_reg_rtx (SImode);
583 rtx_code_label *L_return = gen_label_rtx ();
584 rtx_code_label *L_loop_byte = gen_label_rtx ();
585
586 rtx jump;
587 rtx_code_label *L_loop_long = gen_label_rtx ();
588 rtx_code_label *L_end_loop_long = gen_label_rtx ();
589
590 int align = INTVAL (operands[3]);
591
592 emit_move_insn (operands[0], GEN_INT (-1));
593
594 /* remember start of string. */
595 emit_move_insn (start_addr, current_addr);
596
597 if (align < 4)
598 {
599 emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
600 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
601 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
602 }
603
604 emit_move_insn (tmp0, operands[2]);
605
606 addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
607
608 /* start long loop. */
609 emit_label (L_loop_long);
610
611 /* tmp1 is aligned, OK to load. */
612 emit_move_insn (tmp1, addr1);
613 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
614
615 /* Is there a 0 byte ? */
616 emit_insn (gen_cmpstr_t (tmp0, tmp1));
617
618 jump = emit_jump_insn (gen_branch_false (L_loop_long));
619 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
620 /* end loop. */
621
622 emit_label (L_end_loop_long);
623
624 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
625
626 addr1 = adjust_address (addr1, QImode, 0);
627
628 /* unroll remaining bytes. */
629 for (int i = 0; i < 4; ++i)
630 {
631 emit_insn (gen_extendqisi2 (tmp1, addr1));
632 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
633 emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
634 jump = emit_jump_insn (gen_branch_true (L_return));
635 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
636 }
637
638 emit_barrier_after (jump);
639
640 /* start byte loop. */
641 emit_label (L_loop_byte);
642
643 emit_insn (gen_extendqisi2 (tmp1, addr1));
644 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
645
646 emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
647 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
648 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
649
650 /* end loop. */
651
652 emit_label (L_return);
653
654 emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
655 emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
656
657 return true;
658 }
659
660 /* Emit code to perform a memset.
661
662 OPERANDS[0] is the destination.
663 OPERANDS[1] is the size;
664 OPERANDS[2] is the char to search.
665 OPERANDS[3] is the alignment. */
666 void
667 sh_expand_setmem (rtx *operands)
668 {
669 rtx_code_label *L_loop_byte = gen_label_rtx ();
670 rtx_code_label *L_loop_word = gen_label_rtx ();
671 rtx_code_label *L_return = gen_label_rtx ();
672 rtx jump;
673 rtx dest = copy_rtx (operands[0]);
674 rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
675 rtx val = force_reg (SImode, operands[2]);
676 int align = INTVAL (operands[3]);
677 rtx len = force_reg (SImode, operands[1]);
678
679 if (! CONST_INT_P (operands[1]))
680 return;
681
682 int count = INTVAL (operands[1]);
683
684 if (CONST_INT_P (operands[2])
685 && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
686 {
687 rtx lenw = gen_reg_rtx (SImode);
688
689 if (align < 4)
690 {
691 emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
692 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
693 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
694 }
695
696 /* word count. Do we have iterations ? */
697 emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
698
699 dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
700
701 /* start loop. */
702 emit_label (L_loop_word);
703
704 if (TARGET_SH2)
705 emit_insn (gen_dect (lenw, lenw));
706 else
707 {
708 emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
709 emit_insn (gen_tstsi_t (lenw, lenw));
710 }
711
712 emit_move_insn (dest, val);
713 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
714 GET_MODE_SIZE (SImode)));
715
716
717 jump = emit_jump_insn (gen_branch_false (L_loop_word));
718 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
719 count = count % 4;
720
721 dest = adjust_address (dest, QImode, 0);
722
723 val = gen_lowpart (QImode, val);
724
725 while (count--)
726 {
727 emit_move_insn (dest, val);
728 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
729 GET_MODE_SIZE (QImode)));
730 }
731
732 jump = emit_jump_insn (gen_jump_compact (L_return));
733 emit_barrier_after (jump);
734 }
735
736 dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
737
738 /* start loop. */
739 emit_label (L_loop_byte);
740
741 if (TARGET_SH2)
742 emit_insn (gen_dect (len, len));
743 else
744 {
745 emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
746 emit_insn (gen_tstsi_t (len, len));
747 }
748
749 val = gen_lowpart (QImode, val);
750 emit_move_insn (dest, val);
751 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
752 GET_MODE_SIZE (QImode)));
753
754 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
755 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
756
757 emit_label (L_return);
758 }