;;
riscv*)
cpu_type=riscv
- extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o riscv-vsetvl.o riscv-vector-costs.o"
+ extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-string.o"
+ extra_objs="${extra_objs} riscv-v.o riscv-vsetvl.o riscv-vector-costs.o"
extra_objs="${extra_objs} riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
extra_objs="${extra_objs} thead.o"
d_target_objs="riscv-d.o"
/* Mask that selects the riscv_builtin_class part of a function code. */
const unsigned int RISCV_BUILTIN_CLASS = (1 << RISCV_BUILTIN_SHIFT) - 1;
+/* Routines implemented in riscv-string.cc. */
+extern bool riscv_expand_strlen (rtx, rtx, rtx, rtx);
+
/* Routines implemented in thead.cc. */
extern bool th_mempair_operands_p (rtx[4], bool, machine_mode);
extern void th_mempair_order_operands (rtx[4], bool, machine_mode);
--- /dev/null
+/* Subroutines used to expand string operations for RISC-V.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "memmodel.h"
+#include "tm_p.h"
+#include "ira.h"
+#include "print-tree.h"
+#include "varasm.h"
+#include "explow.h"
+#include "expr.h"
+#include "output.h"
+#include "target.h"
+#include "predict.h"
+#include "optabs.h"
+
+/* Emit proper instruction depending on mode of dest. */
+
+#define GEN_EMIT_HELPER2(name) \
+static rtx_insn * \
+do_## name ## 2(rtx dest, rtx src) \
+{ \
+ rtx_insn *insn; \
+ if (GET_MODE (dest) == DImode) \
+ insn = emit_insn (gen_ ## name ## di2 (dest, src)); \
+ else \
+ insn = emit_insn (gen_ ## name ## si2 (dest, src)); \
+ return insn; \
+}
+
+/* Emit proper instruction depending on mode of dest. */
+
+#define GEN_EMIT_HELPER3(name) \
+static rtx_insn * \
+do_## name ## 3(rtx dest, rtx src1, rtx src2) \
+{ \
+ rtx_insn *insn; \
+ if (GET_MODE (dest) == DImode) \
+ insn = emit_insn (gen_ ## name ## di3 (dest, src1, src2)); \
+ else \
+ insn = emit_insn (gen_ ## name ## si3 (dest, src1, src2)); \
+ return insn; \
+}
+
+GEN_EMIT_HELPER3(add) /* do_add3 */
+GEN_EMIT_HELPER2(clz) /* do_clz2 */
+GEN_EMIT_HELPER2(ctz) /* do_ctz2 */
+GEN_EMIT_HELPER3(lshr) /* do_lshr3 */
+GEN_EMIT_HELPER2(orcb) /* do_orcb2 */
+GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2 */
+GEN_EMIT_HELPER3(sub) /* do_sub3 */
+GEN_EMIT_HELPER2(th_rev) /* do_th_rev2 */
+GEN_EMIT_HELPER2(th_tstnbz) /* do_th_tstnbz2 */
+GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2 */
+
+#undef GEN_EMIT_HELPER2
+#undef GEN_EMIT_HELPER3
+
+/* Helper function to load a byte or a Pmode register.
+
+ MODE is the mode to use for the load (QImode or Pmode).
+ DEST is the destination register for the data.
+ ADDR_REG is the register that holds the address.
+ ADDR is the address expression to load from.
+
+ This function returns an rtx containing the register,
+ where the ADDR is stored. */
+
+static rtx
+do_load_from_addr (machine_mode mode, rtx dest, rtx addr_reg, rtx addr)
+{
+ rtx mem = gen_rtx_MEM (mode, addr_reg);
+ MEM_COPY_ATTRIBUTES (mem, addr);
+ set_mem_size (mem, GET_MODE_SIZE (mode));
+
+ if (mode == QImode)
+ do_zero_extendqi2 (dest, mem);
+ else if (mode == Xmode)
+ emit_move_insn (dest, mem);
+ else
+ gcc_unreachable ();
+
+ return addr_reg;
+}
+
+/* If the provided string is aligned, then read XLEN bytes
+ in a loop and use orc.b to find NUL-bytes. */
+
+static bool
+riscv_expand_strlen_scalar (rtx result, rtx src, rtx align)
+{
+ rtx testval, addr, addr_plus_regsz, word, zeros;
+ rtx loop_label, cond;
+
+ gcc_assert (TARGET_ZBB || TARGET_XTHEADBB);
+
+ /* The alignment needs to be known and big enough. */
+ if (!CONST_INT_P (align) || UINTVAL (align) < GET_MODE_SIZE (Xmode))
+ return false;
+
+ testval = gen_reg_rtx (Xmode);
+ addr = copy_addr_to_reg (XEXP (src, 0));
+ addr_plus_regsz = gen_reg_rtx (Pmode);
+ word = gen_reg_rtx (Xmode);
+ zeros = gen_reg_rtx (Xmode);
+
+ if (TARGET_ZBB)
+ emit_insn (gen_rtx_SET (testval, constm1_rtx));
+ else
+ emit_insn (gen_rtx_SET (testval, const0_rtx));
+
+ do_add3 (addr_plus_regsz, addr, GEN_INT (UNITS_PER_WORD));
+
+ loop_label = gen_label_rtx ();
+ emit_label (loop_label);
+
+ /* Load a word and use orc.b/th.tstnbz to find a zero-byte. */
+ do_load_from_addr (Xmode, word, addr, src);
+ do_add3 (addr, addr, GEN_INT (UNITS_PER_WORD));
+ if (TARGET_ZBB)
+ do_orcb2 (word, word);
+ else
+ do_th_tstnbz2 (word, word);
+ cond = gen_rtx_EQ (VOIDmode, word, testval);
+ emit_unlikely_jump_insn (gen_cbranch4 (Xmode, cond, word, testval, loop_label));
+
+ /* Calculate the return value by counting zero-bits. */
+ if (TARGET_ZBB)
+ do_one_cmpl2 (word, word);
+ if (TARGET_BIG_ENDIAN)
+ do_clz2 (zeros, word);
+ else if (TARGET_ZBB)
+ do_ctz2 (zeros, word);
+ else
+ {
+ do_th_rev2 (word, word);
+ do_clz2 (zeros, word);
+ }
+
+ do_lshr3 (zeros, zeros, GEN_INT (exact_log2 (BITS_PER_UNIT)));
+ do_add3 (addr, addr, zeros);
+ do_sub3 (result, addr, addr_plus_regsz);
+
+ return true;
+}
+
+/* Expand a strlen operation and return true if successful.
+ Return false if we should let the compiler generate normal
+ code, probably a strlen call. */
+
+bool
+riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align)
+{
+ gcc_assert (search_char == const0_rtx);
+
+ if (TARGET_ZBB || TARGET_XTHEADBB)
+ return riscv_expand_strlen_scalar (result, src, align);
+
+ return false;
+}
;; the calling convention of callee
UNSPEC_CALLEE_CC
+
+ ;; String unspecs
+ UNSPEC_STRLEN
])
(define_c_enum "unspecv" [
"TARGET_XTHEADMAC"
)
+;; Search character in string (generalization of strlen).
+;; Argument 0 is the resulting offset
+;; Argument 1 is the string
+;; Argument 2 is the search character
+;; Argument 3 is the alignment
+
+(define_expand "strlen<mode>"
+ [(set (match_operand:X 0 "register_operand")
+ (unspec:X [(match_operand:BLK 1 "general_operand")
+ (match_operand:SI 2 "const_int_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_STRLEN))]
+ "riscv_inline_strlen && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)"
+{
+ rtx search_char = operands[2];
+
+ if (search_char != const0_rtx)
+ FAIL;
+
+ if (riscv_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+ DONE;
+ else
+ FAIL;
+})
+
(include "bitmanip.md")
(include "crypto.md")
(include "sync.md")
Target Var(TARGET_INLINE_SUBWORD_ATOMIC) Init(1)
Always inline subword atomic operations.
+minline-strlen
+Target Bool Var(riscv_inline_strlen) Init(0)
+Inline strlen calls if possible.
+
Enum
Name(riscv_autovec_preference) Type(enum riscv_autovec_preference_enum)
Valid arguments to -param=riscv-autovec-preference=:
$(COMPILE) $<
$(POSTCOMPILE)
+riscv-string.o: $(srcdir)/config/riscv/riscv-string.cc \
+ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TARGET_H) backend.h $(RTL_H) \
+ memmodel.h $(EMIT_RTL_H) poly-int.h output.h
+ $(COMPILE) $<
+ $(POSTCOMPILE)
+
riscv-v.o: $(srcdir)/config/riscv/riscv-v.cc \
$(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \
$(TM_P_H) $(TARGET_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) \
[(set_attr "type" "bitmanip")
(set_attr "mode" "<X:MODE>")])
-(define_insn "*th_rev<mode>2"
+(define_insn "th_rev<mode>2"
[(set (match_operand:GPR 0 "register_operand" "=r")
(bswap:GPR (match_operand:GPR 1 "register_operand" "r")))]
"TARGET_XTHEADBB && (TARGET_64BIT || <MODE>mode == SImode)"
[(set_attr "type" "bitmanip")
(set_attr "mode" "<GPR:MODE>")])
+(define_insn "th_tstnbz<mode>2"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (unspec:X [(match_operand:X 1 "register_operand" "r")] UNSPEC_ORC_B))]
+ "TARGET_XTHEADBB"
+ "th.tstnbz\t%0,%1"
+ [(set_attr "type" "bitmanip")])
+
;; XTheadBs
(define_insn "*th_tst<mode>3"
-mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg}
-mstack-protector-guard-offset=@var{offset}
-mcsr-check -mno-csr-check
--minline-atomics -mno-inline-atomics}
+-minline-atomics -mno-inline-atomics
+-minline-strlen -mno-inline-strlen}
@emph{RL78 Options}
@gccoptlist{-msim -mmul=none -mmul=g13 -mmul=g14 -mallregs
libatomic function calls. The default is to use fast inline subword atomics
that do not require libatomic.
+@opindex minline-strlen
+@item -minline-strlen
+@itemx -mno-inline-strlen
+Do or do not attempt to inline strlen calls if possible.
+Inlining will only be done if the string is properly aligned
+and instructions for accelerated processing are available.
+The default is to not inline strlen calls.
+
@opindex mshorten-memrefs
@item -mshorten-memrefs
@itemx -mno-shorten-memrefs
return last;
}
+/* Make an insn of code JUMP_INSN with pattern X,
+ add a REG_BR_PROB note that indicates very likely probability,
+ and add it to the end of the doubly-linked list. */
+
+rtx_insn *
+emit_likely_jump_insn (rtx x)
+{
+ rtx_insn *jump = emit_jump_insn (x);
+ add_reg_br_prob_note (jump, profile_probability::very_likely ());
+ return jump;
+}
+
+/* Make an insn of code JUMP_INSN with pattern X,
+ add a REG_BR_PROB note that indicates very unlikely probability,
+ and add it to the end of the doubly-linked list. */
+
+rtx_insn *
+emit_unlikely_jump_insn (rtx x)
+{
+ rtx_insn *jump = emit_jump_insn (x);
+ add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+ return jump;
+}
+
/* Make an insn of code CALL_INSN with pattern X
and add it to the end of the doubly-linked list. */
extern rtx_insn *emit_insn (rtx);
extern rtx_insn *emit_debug_insn (rtx);
extern rtx_insn *emit_jump_insn (rtx);
+extern rtx_insn *emit_likely_jump_insn (rtx);
+extern rtx_insn *emit_unlikely_jump_insn (rtx);
extern rtx_insn *emit_call_insn (rtx);
extern rtx_code_label *emit_label (rtx);
extern rtx_jump_table_data *emit_jump_table_data (rtx);
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-minline-strlen -march=rv32gc_xtheadbb" { target { rv32 } } } */
+/* { dg-options "-minline-strlen -march=rv64gc_xtheadbb" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+ return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-not "th.tstnbz\t" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-minline-strlen -march=rv32gc_xtheadbb" { target { rv32 } } } */
+/* { dg-options "-minline-strlen -march=rv64gc_xtheadbb" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+ s = __builtin_assume_aligned (s, 4096);
+ return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler "th.tstnbz\t" } } */
+/* { dg-final { scan-assembler-not "jalr" } } */
+/* { dg-final { scan-assembler-not "call" } } */
+/* { dg-final { scan-assembler-not "jr" } } */
+/* { dg-final { scan-assembler-not "tail" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zbb" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_zbb" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+ s = __builtin_assume_aligned (s, 4096);
+ return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mno-inline-strlen -march=rv32gc_zbb" { target { rv32 } } } */
+/* { dg-options "-mno-inline-strlen -march=rv64gc_zbb" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+ s = __builtin_assume_aligned (s, 4096);
+ return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-minline-strlen -march=rv32gc_zbb" { target { rv32 } } } */
+/* { dg-options "-minline-strlen -march=rv64gc_zbb" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+ return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-minline-strlen -march=rv32gc_zbb" { target { rv32 } } } */
+/* { dg-options "-minline-strlen -march=rv64gc_zbb" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+ s = __builtin_assume_aligned (s, 4096);
+ return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler "orc.b\t" } } */
+/* { dg-final { scan-assembler-not "jalr" } } */
+/* { dg-final { scan-assembler-not "call" } } */
+/* { dg-final { scan-assembler-not "jr" } } */
+/* { dg-final { scan-assembler-not "tail" } } */