FAIL;
})
+;; Inlining general memmove is a pessimisation as we can't avoid having to
+;; decide which direction to go at runtime, which can be costly. Until we
+;; can benchmark implementations on real V hardware implement a conservative
+;; approach of inlining cases which can be performed with a single vector
+;; load + store. For tiny moves, fallback to scalar.
+(define_expand "movmem<mode>"
+ [(parallel [(set (match_operand:BLK 0 "general_operand")
+ (match_operand:BLK 1 "general_operand"))
+ (use (match_operand:P 2 "const_int_operand"))
+ (use (match_operand:SI 3 "const_int_operand"))])]
+ "TARGET_VECTOR"
+{
+ if (CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) >= TARGET_MIN_VLEN / 8
+ && INTVAL (operands[2]) <= TARGET_MIN_VLEN
+ && riscv_vector::expand_block_move (operands[0],
+ operands[1],
+ operands[2]))
+ DONE;
+ else
+ FAIL;
+})
+
;; Expand in-line code to clear the instruction cache between operand[0] and
;; operand[1].
(define_expand "clear_cache"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <string.h>
+
+#define MIN_VECTOR_BYTES (__riscv_v_min_vlen/8)
+
+/* tiny memmoves should not be vectorised
+** f1:
+** li\s+a2,15
+** tail\s+memmove
+*/
+char * f1 (char *a, char const *b)
+{
+ return memmove (a, b, 15);
+}
+
+/* vectorise+inline minimum vector register width with LMUL=1
+** f2:
+** (
+** vsetivli\s+zero,16,e8,m1,ta,ma
+** |
+** li\s+[ta][0-7],\d+
+** vsetvli\s+zero,[ta][0-7],e8,m1,ta,ma
+** )
+** vle8\.v\s+v\d+,0\(a1\)
+** vse8\.v\s+v\d+,0\(a0\)
+** ret
+*/
+char * f2 (char *a, char const *b)
+{
+ return memmove (a, b, MIN_VECTOR_BYTES);
+}
+
+/* vectorise+inline up to LMUL=8
+** f3:
+** li\s+[ta][0-7],\d+
+** vsetvli\s+zero,[ta][0-7],e8,m8,ta,ma
+** vle8\.v\s+v\d+,0\(a1\)
+** vse8\.v\s+v\d+,0\(a0\)
+** ret
+*/
+char * f3 (char *a, char const *b)
+{
+ return memmove (a, b, MIN_VECTOR_BYTES*8);
+}
+
+/* don't vectorise if the move is too large for one operation
+** f4:
+** li\s+a2,\d+
+** tail\s+memmove
+*/
+char * f4 (char *a, char const *b)
+{
+ return memmove (a, b, MIN_VECTOR_BYTES*8+1);
+}
+