From: Christian Bruel Date: Wed, 6 Nov 2013 08:49:15 +0000 (+0100) Subject: sh-mem.cc (sh_expand_cmpnstr, [...]): Factorize probabilities... X-Git-Tag: releases/gcc-4.9.0~3002 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3a1a7897b7376b8634dfc4862540b02c691b626d;p=thirdparty%2Fgcc.git sh-mem.cc (sh_expand_cmpnstr, [...]): Factorize probabilities... 2013-11-06 Christian Bruel * gcc/config/sh/sh-mem.cc (sh_expand_cmpnstr, sh_expand_cmpstr): Factorize probabilities, Use adjust_address instead of adjust_automodify_address when possible. Enable for optimize. (sh_expand_strlen): New function. * gcc/config/sh/sh-protos.h (sh_expand_strlen): Declare. * gcc/config/sh/sh.md (strlensi): New pattern. (UNSPEC_BUILTIN_STRLEN): Define. From-SVN: r204445 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3b468540b621..24d25df4d069 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2013-11-06 Christian Bruel + + * gcc/config/sh/sh-mem.cc (sh_expand_cmpnstr, sh_expand_cmpstr): + Factorize probabilities, Use adjust_address instead of + adjust_automodify_address when possible. Enable for optimize. + (sh_expand_strlen): New function. + * gcc/config/sh/sh-protos.h (sh_expand_strlen): Declare. + * gcc/config/sh/sh.md (strlensi): New pattern. + (UNSPEC_BUILTIN_STRLEN): Define. + 2013-11-06 Jakub Jelinek PR middle-end/58970 diff --git a/gcc/config/sh/sh-mem.cc b/gcc/config/sh/sh-mem.cc index eabf68711737..729e848586df 100644 --- a/gcc/config/sh/sh-mem.cc +++ b/gcc/config/sh/sh-mem.cc @@ -179,32 +179,31 @@ expand_block_move (rtx *operands) return false; } +static int prob_unlikely = REG_BR_PROB_BASE / 10; +static int prob_likely = REG_BR_PROB_BASE / 4; + /* Emit code to perform a strcmp. OPERANDS[0] is the destination. OPERANDS[1] is the first string. OPERANDS[2] is the second string. - OPERANDS[3] is the align. */ + OPERANDS[3] is the known alignment. */ bool sh_expand_cmpstr (rtx *operands) { - rtx s1 = copy_rtx (operands[1]); - rtx s2 = copy_rtx (operands[2]); - rtx s1_addr = copy_addr_to_reg (XEXP (s1, 0)); - rtx s2_addr = copy_addr_to_reg (XEXP (s2, 0)); + rtx addr1 = operands[1]; + rtx addr2 = operands[2]; + rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); + rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); rtx tmp0 = gen_reg_rtx (SImode); rtx tmp1 = gen_reg_rtx (SImode); rtx tmp2 = gen_reg_rtx (SImode); rtx tmp3 = gen_reg_rtx (SImode); + rtx jump; rtx L_return = gen_label_rtx (); rtx L_loop_byte = gen_label_rtx (); rtx L_end_loop_byte = gen_label_rtx (); - - rtx jump, addr1, addr2; - int prob_unlikely = REG_BR_PROB_BASE / 10; - int prob_likely = REG_BR_PROB_BASE / 4; - rtx L_loop_long = gen_label_rtx (); rtx L_end_loop_long = gen_label_rtx (); @@ -220,8 +219,8 @@ sh_expand_cmpstr (rtx *operands) add_int_reg_note (jump, REG_BR_PROB, prob_likely); } - addr1 = adjust_automodify_address (s1, SImode, s1_addr, 0); - addr2 = adjust_automodify_address (s2, SImode, s2_addr, 0); + addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); + addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); /* tmp2 is aligned, OK to load. */ emit_move_insn (tmp3, addr2); @@ -276,8 +275,8 @@ sh_expand_cmpstr (rtx *operands) emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4)); /* start byte loop. */ - addr1 = adjust_automodify_address (s1, QImode, s1_addr, 0); - addr2 = adjust_automodify_address (s2, QImode, s2_addr, 0); + addr1 = adjust_address (addr1, QImode, 0); + addr2 = adjust_address (addr2, QImode, 0); emit_label (L_loop_byte); @@ -317,27 +316,23 @@ sh_expand_cmpstr (rtx *operands) OPERANDS[1] is the first string. OPERANDS[2] is the second string. OPERANDS[3] is the length. - OPERANDS[4] is the align. */ + OPERANDS[4] is the known alignment. */ bool sh_expand_cmpnstr (rtx *operands) { - rtx s1 = copy_rtx (operands[1]); - rtx s2 = copy_rtx (operands[2]); - - rtx s1_addr = copy_addr_to_reg (XEXP (s1, 0)); - rtx s2_addr = copy_addr_to_reg (XEXP (s2, 0)); + rtx addr1 = operands[1]; + rtx addr2 = operands[2]; + rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); + rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); rtx tmp0 = gen_reg_rtx (SImode); rtx tmp1 = gen_reg_rtx (SImode); rtx tmp2 = gen_reg_rtx (SImode); + rtx jump; rtx L_return = gen_label_rtx (); rtx L_loop_byte = gen_label_rtx (); rtx L_end_loop_byte = gen_label_rtx (); - rtx jump, addr1, addr2; - int prob_unlikely = REG_BR_PROB_BASE / 10; - int prob_likely = REG_BR_PROB_BASE / 4; - rtx len = force_reg (SImode, operands[3]); int constp = (CONST_INT_P (operands[3])); int bytes = (constp ? INTVAL (operands[3]) : 0); @@ -366,10 +361,10 @@ sh_expand_cmpnstr (rtx *operands) add_int_reg_note (jump, REG_BR_PROB, prob_likely); } - addr1 = adjust_automodify_address (s1, SImode, s1_addr, 0); - addr2 = adjust_automodify_address (s2, SImode, s2_addr, 0); + addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); + addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); - /* words count. */ + /* word count. Do we have iterations ? */ emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); /*start long loop. */ @@ -429,48 +424,128 @@ sh_expand_cmpnstr (rtx *operands) emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4)); } - addr1 = adjust_automodify_address (s1, QImode, s1_addr, 0); - addr2 = adjust_automodify_address (s2, QImode, s2_addr, 0); + addr1 = adjust_address (addr1, QImode, 0); + addr2 = adjust_address (addr2, QImode, 0); + + emit_label (L_loop_byte); + + emit_insn (gen_extendqisi2 (tmp2, addr2)); + emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1)); + + emit_insn (gen_extendqisi2 (tmp1, addr1)); + emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1)); + + emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); + jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); + add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); + + emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); + if (flag_delayed_branch) + emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); + jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); + add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); + + if (TARGET_SH2) + emit_insn (gen_dect (len, len)); + else + { + emit_insn (gen_addsi3 (len, len, GEN_INT (-1))); + emit_insn (gen_tstsi_t (len, len)); + } + + jump = emit_jump_insn (gen_branch_false (L_loop_byte)); + add_int_reg_note (jump, REG_BR_PROB, prob_likely); + /* end byte loop. */ + + emit_label (L_end_loop_byte); + + if (! flag_delayed_branch) + emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); + emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1))); + + emit_label (L_return); + + emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); + + return true; +} + +/* Emit code to perform a strlen + + OPERANDS[0] is the destination. + OPERANDS[1] is the string. + OPERANDS[2] is the char to search. + OPERANDS[3] is the alignment. */ +bool +sh_expand_strlen (rtx *operands) +{ + rtx addr1 = operands[1]; + rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0)); + rtx start_addr = gen_reg_rtx (Pmode); + rtx tmp0 = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (SImode); + rtx L_return = gen_label_rtx (); + rtx L_loop_byte = gen_label_rtx (); + + rtx jump; + rtx L_loop_long = gen_label_rtx (); + rtx L_end_loop_long = gen_label_rtx (); + + int align = INTVAL (operands[3]); + + emit_move_insn (operands[0], GEN_INT (-1)); + + /* remember start of string. */ + emit_move_insn (start_addr, current_addr); + + if (align < 4) + { + emit_insn (gen_tstsi_t (GEN_INT (3), current_addr)); + jump = emit_jump_insn (gen_branch_false (L_loop_byte)); + add_int_reg_note (jump, REG_BR_PROB, prob_likely); + } + + emit_move_insn (tmp0, operands[2]); + + addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0); + + /*start long loop. */ + emit_label (L_loop_long); + + /* tmp1 is aligned, OK to load. */ + emit_move_insn (tmp1, addr1); + emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4)); + + /* Is there a 0 byte ? */ + emit_insn (gen_cmpstr_t (tmp0, tmp1)); - emit_label (L_loop_byte); + jump = emit_jump_insn (gen_branch_false (L_loop_long)); + add_int_reg_note (jump, REG_BR_PROB, prob_likely); + /* end loop. */ - emit_insn (gen_extendqisi2 (tmp2, addr2)); - emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1)); + emit_label (L_end_loop_long); - emit_insn (gen_extendqisi2 (tmp1, addr1)); - emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1)); + emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4)); - emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); - jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); - add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); + /* start byte loop. */ + addr1 = adjust_address (addr1, QImode, 0); - emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); - if (flag_delayed_branch) - emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); - jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); - add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); + emit_label (L_loop_byte); - if (TARGET_SH2) - emit_insn (gen_dect (len, len)); - else - { - emit_insn (gen_addsi3 (len, len, GEN_INT (-1))); - emit_insn (gen_tstsi_t (len, len)); - } + emit_insn (gen_extendqisi2 (tmp1, addr1)); + emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); - jump = emit_jump_insn (gen_branch_false (L_loop_byte)); - add_int_reg_note (jump, REG_BR_PROB, prob_likely); - /* end byte loop. */ + emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); + jump = emit_jump_insn (gen_branch_false (L_loop_byte)); + add_int_reg_note (jump, REG_BR_PROB, prob_likely); - emit_label (L_end_loop_byte); + /* end loop. */ - if (! flag_delayed_branch) - emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); - emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1))); + emit_label (L_return); - emit_label (L_return); + emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1))); - emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); + emit_insn (gen_subsi3 (operands[0], current_addr, start_addr)); - return true; + return true; } diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index e7dfce366abc..11877e58b522 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -118,6 +118,7 @@ extern bool expand_block_move (rtx *); extern void prepare_move_operands (rtx[], enum machine_mode mode); extern bool sh_expand_cmpstr (rtx *); extern bool sh_expand_cmpnstr (rtx *); +extern bool sh_expand_strlen (rtx *); extern enum rtx_code prepare_cbranch_operands (rtx *, enum machine_mode mode, enum rtx_code comparison); extern void expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int); diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 364226b5c7e2..1dd288c48331 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -161,6 +161,9 @@ ;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .). (UNSPEC_PCREL_SYMOFF 46) + ;; Misc builtins + (UNSPEC_BUILTIN_STRLEN 47) + ;; These are used with unspec_volatile. (UNSPECV_BLOCKAGE 0) (UNSPECV_ALIGN 1) @@ -12059,7 +12062,7 @@ label: (compare:SI (match_operand:BLK 1 "memory_operand") (match_operand:BLK 2 "memory_operand"))) (use (match_operand 3 "immediate_operand"))] - "TARGET_SH1" + "TARGET_SH1 && optimize" { if (! optimize_insn_for_size_p () && sh_expand_cmpstr (operands)) DONE; @@ -12073,7 +12076,7 @@ label: (match_operand:BLK 2 "memory_operand"))) (use (match_operand:SI 3 "immediate_operand")) (use (match_operand:SI 4 "immediate_operand"))] - "TARGET_SH1" + "TARGET_SH1 && optimize" { if (! optimize_insn_for_size_p () && sh_expand_cmpnstr (operands)) DONE; @@ -12081,6 +12084,20 @@ label: FAIL; }) +(define_expand "strlensi" + [(set (match_operand:SI 0 "register_operand") + (unspec:SI [(match_operand:BLK 1 "memory_operand") + (match_operand:SI 2 "immediate_operand") + (match_operand:SI 3 "immediate_operand")] + UNSPEC_BUILTIN_STRLEN))] + "TARGET_SH1 && optimize" +{ + if (! optimize_insn_for_size_p () && sh_expand_strlen (operands)) + DONE; + else + FAIL; +}) + ;; ------------------------------------------------------------------------- ;; Floating point instructions. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ad1d4db8373a..8460edb8d0a0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2013-11-06 Christian Bruel + + * gcc.target/sh/strlen.c: New test. + 2013-11-06 Jakub Jelinek PR middle-end/58970 diff --git a/gcc/testsuite/gcc.target/sh/strlen.c b/gcc/testsuite/gcc.target/sh/strlen.c new file mode 100644 index 000000000000..8a99781f3b44 --- /dev/null +++ b/gcc/testsuite/gcc.target/sh/strlen.c @@ -0,0 +1,19 @@ +/* Check that the __builtin_strlen function is inlined with cmp/str + when optimizing for speed. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O2" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */ +/* { dg-final { scan-assembler-not "jmp" } } */ +/* { dg-final { scan-assembler-times "cmp/str" 2 } } */ +/* { dg-final { scan-assembler-times "tst\t#3" 1 } } */ + +test00 (const char *s1) +{ + return __builtin_strlen (s1); +} + +/* Check that no test for alignment is needed. */ +test03(const char *s1) +{ + return __builtin_strlen (__builtin_assume_aligned (s1, 4)); +}