--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, int n, int vl)
+{
+ for (int i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {\.L[0-9]+\:\s+vle8\.v\s+v[0-9]+,\s*0\s*\([a-x0-9]+\)} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, int n)
+{
+ size_t vl = 39;
+ for (int i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vint8mf2_t v2 = __riscv_vle8_v_i8mf2 (in + i + 100, vl);
+ __riscv_vse8_v_i8mf2 (out + i + 100, v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, int n)
+{
+ size_t vl = 39;
+ for (int i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {\.L[0-9]+\:\s+vle8\.v\s+v[0-9]+,\s*0\s*\([a-x0-9]+\)} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
+{
+ if (cond == 2) {
+ size_t vl = 101;
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 900, vl);
+ __riscv_vse8_v_i8mf8 (out + 900, v, vl);
+ vl = 102;
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + 1000, vl);
+ __riscv_vse8_v_i8mf8 (out + 1000, v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
+{
+ size_t vl = 101;
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 200, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 200, v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
+{
+ size_t vl = 101;
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
+{
+ size_t vl = 101;
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ size_t vl = 101;
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ for (size_t j = 0; j < m; j++){
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in + i + j + 100, vl);
+ __riscv_vse8_v_i8mf8_m (mask, out + i + j + 100, v2, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + j + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + j + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + j + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + j + 300), v2, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ size_t vl = 101;
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ for (size_t j = 0; j < m; j++){
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + j + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + j + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + j + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + j + 300), v2, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ size_t vl = 101;
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ for (size_t j = 0; j < m; j++){
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in + i + j + 100, vl);
+ __riscv_vse8_v_i8mf8_m (mask, out + i + j + 100, v2, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + j + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + j + 200), v, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ size_t vl = 101;
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ for (size_t j = 0; j < m; j++){
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+
+ for (size_t i = 0; i < n * n; i++)
+ out[i] = out[i] * out[i];
+ for (size_t i = 0; i < n * n * n; i++)
+ out[i] = out[i] + out[i];
+ for (size_t i = 0; i < n * n * n * n; i++)
+ out[i] = out[i] + 2;
+ for (size_t i = 0; i < n * n * n * n * n; i++)
+ out[i] = out[i] * 100;
+ for (size_t i = 0; i < n * n * n * n * n * n; i++)
+ out[i] = out[i] - 77;
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + j + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + j + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + j + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + j + 300), v2, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, int n, size_t vl)
+{
+ for (int i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl << 3);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl << 3);
+ }
+}
+
+/* { dg-final { scan-assembler-times {\.L[0-9]+\:\s+vle8\.v\s+v[0-9]+,\s*0\s*\([a-x0-9]+\)} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {slli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ size_t vl = 101;
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ for (size_t j = 0; j < m; j++){
+ for (size_t i = 0; i < cond; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+
+ for (size_t i = 0; i < n * n; i++)
+ out[i] = out[i] * out[i];
+ for (size_t i = 0; i < n * n * n; i++)
+ out[i] = out[i] + out[i];
+ for (size_t i = 0; i < n * n * n * n; i++)
+ out[i] = out[i] + 2;
+ for (size_t i = 0; i < n * n * n * n * n; i++)
+ out[i] = out[i] * 100;
+ for (size_t i = 0; i < n * n * n * n * n * n; i++)
+ out[i] = out[i] - 77;
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + j + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + j + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + j + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + j + 300), v2, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ size_t vl = 101;
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ for (size_t j = 0; j < m; j++){
+
+ if (cond) {
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+ } else {
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + j + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + j + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + j + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + j + 300), v2, vl);
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ for (size_t j = 0; j < m; j++){
+
+ size_t vl = 101;
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+
+ for (size_t i = 0; i < n * n; i++)
+ out[i] = out[i] * out[i];
+ for (size_t i = 0; i < n * n * n; i++)
+ out[i] = out[i] + out[i];
+ for (size_t i = 0; i < n * n * n * n; i++)
+ out[i] = out[i] + 2;
+ for (size_t i = 0; i < n * n * n * n * n; i++)
+ out[i] = out[i] * 100;
+ for (size_t i = 0; i < n * n * n * n * n * n; i++)
+ out[i] = out[i] - 77;
+
+ vl = 101;
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + j + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + j + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + j + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + j + 300), v2, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ for (size_t j = 0; j < m; j++){
+
+ size_t vl = 101;
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+
+ vl = 102;
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + j + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + j + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + j + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + j + 300), v2, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {li\s+[a-x0-9]+,101} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {li\s+[a-x0-9]+,102} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ for (size_t j = 0; j < m; j++){
+
+ size_t vl = 101;
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+
+ for (size_t i = 0; i < cond; i++)
+ out[i] = out[i] * out[i];
+
+ vl = 102;
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + j + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + j + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + j + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + j + 300), v2, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {li\s+[a-x0-9]+,101} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {li\s+[a-x0-9]+,102} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ size_t vl = 101;
+ for (size_t j = 0; j < m; j++){
+
+ if (cond) {
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+ } else {
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v, vl);
+ }
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + j + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + j + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + j + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + j + 300), v2, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ size_t vl = 101;
+ for (size_t j = 0; j < m; j++){
+
+ if (cond) {
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+ } else {
+ for (size_t i = 0; i < cond; i++)
+ out[i] = out[i] * 33;
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + j + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + j + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + j + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + j + 300), v2, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f2 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ vl = 101;
+ if (cond > 0) {
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+ } else {
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 1000, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 1000, v, vl);
+ }
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f2 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ vl = 101;
+ if (cond > 0) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, vl);
+ __riscv_vse8_v_i8mf8 (out, v, vl);
+ } else {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 10000, vl);
+ __riscv_vse8_v_i8mf8 (out + 10000, v, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f2 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ vl = 101;
+ if (cond > 0) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, vl);
+ __riscv_vse8_v_i8mf8 (out, v, vl);
+ } else {
+ vint16mf4_t v = __riscv_vle16_v_i16mf4 ((int16_t *)(in + 10000), vl);
+ __riscv_vse16_v_i16mf4 ((int16_t *)(out + 10000), v, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, int n, size_t vl)
+{
+ vl = vl << 3;
+ for (int i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {\.L[0-9]+\:\s+vle8\.v\s+v[0-9]+,\s*0\s*\([a-x0-9]+\)} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {slli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl)
+{
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+
+ vl = 101;
+ if (cond > 0) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, vl);
+ __riscv_vse8_v_i8mf8 (out, v, vl);
+ } else {
+ out[100] = out[100] + 300;
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
+{
+ size_t vl = *(size_t*)(in + 10000);
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
+{
+ size_t vl = ((cond + 100) * cond) >> 3;
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+char fn3 (void);
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
+{
+ size_t vl = fn3 ();
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
+{
+ size_t vl;
+ asm volatile ("li %0, 101" :"=r" (vl)::"memory");
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+static int vl = 0x5545515;
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
+{
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
+{
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300,555);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v,555);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i,555);
+ __riscv_vse8_v_i8mf8 (out + i, v,555);
+
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100,555);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2,555);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl)
+{
+ asm volatile ("li %0, 101" :"=r" (vl)::"memory");
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ if (cond > 0) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, vl);
+ __riscv_vse8_v_i8mf8 (out, v, vl);
+ } else {
+ out[100] = out[100] + 300;
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {\.L[0-9]+\:\s+addi\s+\s*[a-x0-9]+,\s*[a-x0-9]+,\s*[0-9]00\s+addi\s+\s*[a-x0-9]+,\s*[a-x0-9]+,\s*[0-9]00\s+addi\s+\s*[a-x0-9]+,\s*[a-x0-9]+,\s*[0-9]00\s+add\s+\s*[a-x0-9]+,\s*[a-x0-9]+,\s*[a-x0-9]+\s+\.L[0-9]+\:} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int8_t * restrict out2, int n, int m, unsigned cond, size_t vl)
+{
+ vl = 22;
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ if (cond == 0) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, vl);
+ __riscv_vse8_v_i8mf8 (out, v, vl);
+ } else {
+ out2[100] = out2[100] + 300;
+ }
+
+ for (size_t i = 0; i < n; i++)
+ out[i + 200] = out[i + 500] + 22;
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), 22);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, 22);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), 22);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, 22);
+ }
+}
+
+void f2 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned cond, size_t vl)
+{
+ asm volatile ("li %0, 101" :"=r" (vl)::"memory");
+ vbool64_t mask = *(vbool64_t*) (in + 1000000);
+ if (cond > 0) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, vl);
+ __riscv_vse8_v_i8mf8 (out, v, vl);
+ } else {
+ out[100] = out[100] + 300;
+ }
+
+ for (size_t i = 0; i < n; i++)
+ out[i + 200] = out[i + 500] + 555;
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + i + 200), vl);
+ __riscv_vse32_v_f32mf2 ((float *)(out + i + 200), v, vl);
+
+ vfloat32mf2_t v2 = __riscv_vle32_v_f32mf2_tumu (mask, v, (float *)(in + i + 300), vl);
+ __riscv_vse32_v_f32mf2_m (mask, (float *)(out + i + 300), v2, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,22,\s*e32,\s*mf2,\s*tu,\s*mu} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*tu,\s*mu} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t *base, int8_t *out, size_t m, size_t n) {
+ int vl = 101;
+ for (size_t i = 0; i < m; i++) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8_tu(v0, base + i, vl);
+ if (n > 100) {
+ __riscv_vse8_v_i8mf8(out + i + 100, v0, vl);
+ } else {
+ __riscv_vse8_v_i8mf8(out + i, v0, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, int l, int n, int m, size_t vl)
+{
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j, v, vl);
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {\.L[0-9]+\:\s+vle8\.v\s+v[0-9]+,\s*0\s*\([a-x0-9]+\)} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t *base, int8_t *out, size_t m, size_t n) {
+ for (size_t i = 0; i < m; i++) {
+ for (size_t j = 0; j < n; j += 1) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i, j);
+ v0 = __riscv_vle8_v_i8mf8_tu(v0, base + i + 100, j);
+ __riscv_vse8_v_i8mf8(out + i, v0, j);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {\.L[0-9]+\:\s+vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]\s+vle8\.v\s+v[0-9]+,\s*0\s*\([a-x0-9]+\)} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t *base, int8_t *out, size_t m, size_t n) {
+ int vl = 101;
+ for (size_t i = 0; i < m; i++) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8_tu(v0, base + i, vl);
+ if (n > 100) {
+ __riscv_vse8_v_i8mf8(out + i + 100, v0, vl);
+ } else {
+ __riscv_vse8_v_i8mf8(out + i, v0, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t *base, int8_t *out, size_t m, size_t n) {
+ for (size_t i = 0; i < m; i++) {
+ for (size_t j = 0; j < n; j += 1) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i, i);
+ v0 = __riscv_vle8_v_i8mf8_tu(v0, base + i + 100, i);
+ __riscv_vse8_v_i8mf8(out + i, v0, i);
+ }
+ }
+}
+/* { dg-final { scan-assembler {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]\s+\.L[0-9]+\:\s+vle8\.v\s+v[0-9]+,\s*0\s*\([a-x0-9]+\)} { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f(int8_t *base, int8_t *out, size_t vl, size_t m) {
+ vbool64_t mask = *(vbool64_t*) (base + 10000);
+ for (size_t i = 0; i < m; i++) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8_mu(mask, v0, base + i, vl);
+ __riscv_vse8_v_i8mf8(out + i, v0, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {\.L[0-9]+\:\s+vle8\.v\s+v[0-9]+,\s*0\s*\([a-x0-9]+\),v0\.t} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f(int8_t *base, int8_t *out, size_t vl, size_t m) {
+ vbool64_t mask = *(vbool64_t*) (base + 10000);
+ vint8mf8_t v0;
+ for (size_t i = 0; i < m; i++) {
+ if (i % 2 == 0) {
+ v0 = __riscv_vle8_v_i8mf8_tumu(mask, v0, base + i, vl);
+ } else {
+ __riscv_vse8_v_i8mf8(out + i, v0, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*mu\s+j\s+\.L[0-9]+} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void foo1_7(int8_t *base, int8_t *out, size_t vl, size_t m, size_t n, size_t o, size_t p) {
+ size_t avl = vl;
+ if (o > p) {
+ for (size_t i = 0; i < m; i++) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i, avl);
+ v0 = __riscv_vle8_v_i8mf8_tu(v0, base + i + 100, avl);
+ __riscv_vse8_v_i8mf8(out + i, v0, avl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {\.L[0-9]+\:\s+vle8\.v\s+v[0-9]+,\s*0\s*\([a-x0-9]+\)} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
+{
+ int vl = 101;
+ if (n > cond) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 600, vl);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + 600, vl);
+ __riscv_vse8_v_i8mf8 (out + 600, v2, vl);
+ } else {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 700, vl);
+ __riscv_vse8_v_i8mf8 (out + 700, v, vl);
+ }
+
+ for (int i = 0 ; i < n * n * n * n; i++) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 900 + i, vl);
+ __riscv_vse8_v_i8mf8 (out + 900 + i, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
+{
+ size_t vl;
+ asm volatile ("li %0, 101" :"=r" (vl)::"memory");
+ if (n > cond) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 600, cond);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + 600, cond);
+ __riscv_vse8_v_i8mf8 (out + 600, v2, cond);
+ } else {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 700, cond);
+ __riscv_vse8_v_i8mf8 (out + 700, v, cond);
+ }
+
+ for (int i = 0 ; i < n * n; i++)
+ out[i] = out[i] + out[i];
+
+ for (int i = 0 ; i < n * n * n; i++)
+ out[i] = out[i] * out[i];
+
+ for (int i = 0 ; i < n * n * n * n; i++)
+ out[i] = out[i] * out[i];
+
+ for (int i = 0 ; i < n * n * n * n; i++) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 900 + i, cond);
+ __riscv_vse8_v_i8mf8 (out + 900 + i, v, cond);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int n2)
+{
+ size_t vl;
+ asm volatile ("li %0, 101" :"=r" (vl)::"memory");
+ for (int i = 0 ; i < n2; i++) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 800 + i, vl);
+ v = __riscv_vle8_v_i8mf8_tu (v, in + 900 + i, vl);
+ __riscv_vse8_v_i8mf8 (out + 800 + i, v, vl);
+ }
+
+ for (int i = 0 ; i < n * n; i++)
+ out[i] = out[i] + out[i];
+
+ for (int i = 0 ; i < n * n * n; i++)
+ out[i] = out[i] * out[i];
+
+ for (int i = 0 ; i < n * n * n * n; i++)
+ out[i] = out[i] * out[i];
+
+ for (int i = 0 ; i < n * n * n * n; i++) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 900 + i, vl);
+ __riscv_vse8_v_i8mf8 (out + 900 + i, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int n2)
+{
+ size_t vl;
+ asm volatile ("li %0, 101" :"=r" (vl)::"memory");
+ for (int i = 0 ; i < n2; i++) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 800 + i, vl);
+ __riscv_vse8_v_i8mf8 (out + 800 + i, v, vl);
+ }
+
+ for (int i = 0 ; i < n * n; i++)
+ out[i] = out[i] + out[i];
+
+ for (int i = 0 ; i < n * n * n; i++)
+ out[i] = out[i] * out[i];
+
+ for (int i = 0 ; i < n * n * n * n; i++)
+ out[i] = out[i] * out[i];
+
+ asm volatile ("li %0, 102" :"=r" (vl)::"memory");
+ for (int i = 0 ; i < n * n * n * n; i++) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 900 + i, vl);
+ __riscv_vse8_v_i8mf8 (out + 900 + i, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, int n, int vl)
+{
+ for (int i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ vl++;
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]\s+vle8\.v\s+v[0-9]+,\s*0\s*\([a-x0-9]+\)} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f(void *base, void *out, void *mask_in, size_t m) {
+ vbool64_t mask = *(vbool64_t*)mask_in;
+ size_t vl = 105;
+ for (size_t i = 0; i < m; i++) {
+ if (i % 2 == 0) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i, vl);
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8_tu(v0, base + i + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v1, vl);
+ } else {
+ vint16mf4_t v0 = __riscv_vle16_v_i16mf4(base + i, vl);
+ vint16mf4_t v1 = __riscv_vle16_v_i16mf4_mu(mask, v0, base + i + 100, vl);
+ __riscv_vse16_v_i16mf4 (out + i, v1, vl);
+ }
+ }
+}
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f(void *base, void *out, void *mask_in, size_t m, size_t n) {
+ vbool64_t mask = *(vbool64_t*)mask_in;
+ size_t vl = 106;
+ for (size_t i = 0; i < m; i++) {
+ for (size_t j = 0; j < n; j++){
+ if ((i + j) % 2 == 0) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + j, vl);
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8_tu(v0, base + i + j + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j, v1, vl);
+ } else {
+ vint16mf4_t v0 = __riscv_vle16_v_i16mf4(base + i + j, vl);
+ vint16mf4_t v1 = __riscv_vle16_v_i16mf4_mu(mask, v0, base + i + j + 100, vl);
+ __riscv_vse16_v_i16mf4 (out + i + j, v1, vl);
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f(void *base, void *out, void *mask_in, size_t m, size_t n) {
+
+ size_t vl = 107;
+ for (size_t i = 0; i < m; i++) {
+ if (i % 2 == 0) {
+ for (size_t j = 0; j < n; j++){
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + j + 700, vl);
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8_tu(v0, base + i + j + 700, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + 700, v1, vl);
+ if (j % 2 == 0) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + j + 500, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + 500, v0, vl);
+ } else {
+ vint16mf4_t v0 = __riscv_vle16_v_i16mf4(base + i + j + 600, vl);
+ __riscv_vse16_v_i16mf4 (out + i + j + 600, v0, vl);
+ }
+ }
+ } else {
+ for (size_t j = 0; j < n; j++){
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + j + 200, vl);
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8_tu(v0, base + i + j + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + 400, v1, vl);
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f(void *base, void *out, void *mask_in, size_t m, size_t n) {
+
+ size_t vl = 222;
+ for (size_t i = 0; i < m; i++) {
+ if (i % 2 == 0) {
+ for (size_t j = 0; j < n; j++){
+ if (j % 2 == 0) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + j + 500, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + 500, v0, vl);
+ } else {
+ vint16mf4_t v0 = __riscv_vle16_v_i16mf4(base + i + j + 600, vl);
+ __riscv_vse16_v_i16mf4 (out + i + j + 600, v0, vl);
+ }
+ }
+ } else {
+ for (size_t j = 0; j < n; j++){
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + j + 200, vl);
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8_tu(v0, base + i + j + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + 400, v1, vl);
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f(void *base, void *out, void *mask_in, size_t m, size_t n) {
+
+ size_t vl = 333;
+ for (size_t i = 0; i < m; i++) {
+ if (i % 2 == 0) {
+ for (size_t j = 0; j < n; j++){
+ if (j % 2 == 0) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 200, vl);
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8_tu(v0, base + i + 200, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 200, v1, vl);
+ } else {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 300, vl);
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8_tu(v0, base + i + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v1, vl);
+ }
+ }
+ } else {
+ for (size_t j = 0; j < n; j++){
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8(base + i + j + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + 400, v1, vl);
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f(void *base, void *out, void *mask_in, size_t m, size_t n) {
+
+ size_t vl = 444;
+ for (size_t i = 0; i < m; i++) {
+ if (i % 2 == 0) {
+ for (size_t j = 0; j < n; j++){
+ if (j % 2 == 0) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 200 + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 200, v0, vl);
+ } else {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 300 + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v0, vl);
+ }
+ }
+ } else {
+ for (size_t j = 0; j < vl; j++){
+ if (j % 2 == 0) {
+ for (size_t k = 0; k < n; k++) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 500 + k + j, vl);
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8_tu(v0, base + i + 600 + k + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 600, v1, vl);
+ }
+ } else {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 700, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 800, v0, vl);
+ }
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f(void *base, void *out, void *mask_in, size_t m, size_t n) {
+
+ size_t vl = 555;
+ for (size_t i = 0; i < m; i++) {
+ if (i % 2 == 0) {
+ for (size_t j = 0; j < n; j++){
+ if (j % 2 == 0) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 200 + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 200, v0, vl);
+ } else {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 300 + j, vl);
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8_tu(v0, base + i + 300 + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v1, vl);
+ }
+ }
+ } else {
+ for (size_t j = 0; j < vl; j++){
+ if (j % 2 == 0) {
+ for (size_t k = 0; k < n; k++) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 500 + k + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 600, v0, vl);
+ }
+ } else {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 700, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 800, v0, vl);
+ }
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f(void *base, void *out, void *mask_in,
+size_t m, size_t n, size_t a, size_t b) {
+
+ size_t vl = 666;
+ for (size_t i = 0; i < m; i++) {
+ if (i % 2 == 0) {
+ for (size_t j = 0; j < n; j++){
+ if (j % 2 == 0) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 200 + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 200, v0, vl);
+ } else {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 300 + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v0, vl);
+ }
+ }
+ } else {
+ for (size_t j = 0; j < vl; j++){
+ if (j % 2 == 0) {
+ for (size_t k = 0; k < n; k++) {
+ for (size_t i_a = 0; i_a < a; i_a++){
+ for (size_t i_b = 0; i_b < b; i_b++){
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 500 + k + j + i_a + i_b, vl);
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8_tu(v0, base + i + 600 + k + j + i_a + i_b, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 600 + j + k + i_a + i_b, v1, vl);
+ }
+ }
+ }
+ } else {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 700, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 800, v0, vl);
+ }
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f(void *base, void *out, void *mask_in,
+size_t m, size_t n, size_t a, size_t b) {
+
+ size_t vl = 345;
+ for (size_t i = 0; i < m; i++) {
+ if (i % 2 == 0) {
+ for (size_t j = 0; j < n; j++){
+ if (j % 2 == 0) {
+ for (size_t k = 0; k < n; k++) {
+ for (size_t i_a = 0; i_a < a; i_a++){
+ for (size_t i_b = 0; i_b < b; i_b++){
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 500 + k + j + i_a + i_b, vl);
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8_tu(v0, base + i + 600 + k + j + i_a + i_b, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 600 + j + k + i_a + i_b, v1, vl);
+ }
+ }
+ }
+ } else {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 300 + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v0, vl);
+ }
+ }
+ } else {
+ for (size_t j = 0; j < vl; j++){
+ if (j % 2 == 0) {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 200 + j, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 200, v0, vl);
+ } else {
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 700, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 800, v0, vl);
+ }
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f(void *base, void *out, void *mask_in,
+size_t m, size_t n, size_t a, size_t b) {
+
+ size_t vl = 99;
+ for (size_t i = 0; i < m; i++) {
+ if (i % 2 == 0) {
+ for (size_t j = 0; j < n; j++){
+ if (j % 2 == 0) {
+ for (size_t k = 0; k < n; k++) {
+ for (size_t i_a = 0; i_a < a; i_a++){
+ for (size_t i_b = 0; i_b < b; i_b++){
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 500 + k + j + i_a + i_b, vl);
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8_tu(v0, base + i + 600 + k + j + i_a + i_b, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 600 + j + k + i_a + i_b, v1, vl);
+ }
+ }
+ }
+ } else {
+ }
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, int l, int n, int m, size_t vl)
+{
+ for (int i = 0; i < l; i++){
+ vl++;
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j + k, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + k, v, vl);
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]\s+\.L[0-9]+\:\s+ble\s+[a-x0-9]+,\s*zero,\.L[0-9]+\s+\.L[0-9]+\:\s+add\s+\s*[a-x0-9]+,\s*[a-x0-9]+,\s*[a-x0-9]+\s+add\s+\s*[a-x0-9]+,\s*[a-x0-9]+,\s*[a-x0-9]+\s+\.L[0-9]+\:\s+vle8\.v\s+v[0-9]+,\s*0\s*\([a-x0-9]+\)} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f(void *base, void *out, void *mask_in,
+size_t m, size_t n, size_t a, size_t b) {
+
+ size_t vl = 999;
+ for (size_t i = 0; i < m; i++) {
+ if (i % 2 == 0) {
+ for (size_t j = 0; j < n; j++){
+ if (j % 2 == 0) {
+ for (size_t k = 0; k < n; k++) {
+ for (size_t i_a = 0; i_a < a; i_a++){
+ for (size_t i_b = 0; i_b < b; i_b++){
+ vint8mf8_t v0 = __riscv_vle8_v_i8mf8(base + i + 500 + k + j + i_a + i_b, vl);
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8_tu(v0, base + i + 600 + k + j + i_a + i_b, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 600 + j + k + i_a + i_b, v1, vl);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, size_t n, size_t cond)
+{
+ for (size_t i = 0; i < n; i++)
+ {
+ if (i != cond) {
+ size_t vl = 55;
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v, vl);
+ } else {
+ size_t vl = 66;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in + i + 200, vl);
+ __riscv_vse32_v_i32m1 (out + i + 200, v, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, size_t n, size_t cond)
+{
+ for (size_t i = 0; i < n; i++)
+ {
+ if (i == cond) {
+ size_t vl = 55;
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 100, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v, vl);
+ } else {
+ size_t vl = 66;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in + i + 200, vl);
+ __riscv_vse32_v_i32m1 (out + i + 200, v, vl);
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond)
+{
+ size_t vl = 111;
+ if (n > cond) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 600, vl);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + 600, vl);
+ __riscv_vse8_v_i8mf8 (out + 600, v2, vl);
+ } else {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 700, vl);
+ __riscv_vse8_v_i8mf8 (out + 700, v, vl);
+ }
+
+ for (int i = 0 ; i < n * n * n * n; i++) {
+ vint8mf8_t v = *(vint8mf8_t*) (in + 900 + i);
+ *(vint8mf8_t*) (out + 900 + i) = v;
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-funroll-loops" no-opts "-g" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, int n, int cond)
+{
+ size_t vl = 777;
+ if (n > cond) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 600, vl);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + 600, vl);
+ __riscv_vse8_v_i8mf8 (out + 600, v2, vl);
+ } else {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 700, vl);
+ __riscv_vse8_v_i8mf8 (out + 700, v, vl);
+ }
+
+ for (int i = 0 ; i < n * n * n * n; i++) {
+ vint8mf8_t v = *(vint8mf8_t*) (in + 900 + i);
+ *(vint8mf8_t*) (out + 900 + i) = v;
+ }
+
+ for (int i = 0 ; i < n; i++) {
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in + 1000 + i, vl);
+ __riscv_vse32_v_i32m1 (out + 1000 + i, v, vl);
+ }
+
+ vl = 888;
+ for (int i = 0 ; i < n; i++) {
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in + 1000 + i, vl);
+ __riscv_vse32_v_i32m1 (out + 1000 + i, v, vl);
+ }
+
+ vl = 444;
+ for (int i = 0 ; i < n * n; i++) {
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in + 2000 + i, vl);
+ __riscv_vse32_v_i32m1 (out + 2000 + i, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 6 { target { no-opts "-O0" no-opts "-funroll-loops" no-opts "-g" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int n2)
+{
+ size_t vl;
+ asm volatile ("li %0, 101" :"=r" (vl)::"memory");
+
+ for (int i = 0 ; i < n * n * n * n; i++) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 900 + i, vl);
+ __riscv_vse8_v_i8mf8 (out + 900 + i, v, vl);
+ }
+
+ for (int i = 0 ; i < n * n; i++)
+ out[i] = out[i] + out[i];
+
+ for (int i = 0 ; i < n * n * n; i++)
+ out[i] = out[i] * out[i];
+
+ for (int i = 0 ; i < n * n * n * n; i++)
+ out[i] = out[i] * out[i];
+
+ for (int i = 0 ; i < n2; i++) {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + 800 + i, vl);
+ v = __riscv_vle8_v_i8mf8_tu (v, in + 900 + i, vl);
+ __riscv_vse8_v_i8mf8 (out + 800 + i, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int l, int n, int m, size_t cond)
+{
+ size_t vl = 555;
+
+ if (cond) {
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j + k, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + k, v, vl);
+ }
+ }
+ }
+ } else {
+ out[999] = out[999] * in[999];
+ }
+
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ out[i+j+k+10000000] = out[i+j+k+10000000] + in[i+j+k+10000000];
+ }
+ }
+ }
+
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j + k + 10000, vl);
+ v = __riscv_vle8_v_i8mf8_tu (v, in + i + j + k + 20000, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + k + 10000, v, vl);
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f2 (void * restrict in, void * restrict out, int l, int n, int m)
+{
+ size_t vl = 101;
+ for (int i = 0; i < l; i++){
+ size_t vl = i + vl + 44;
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j + k + 10000, vl);
+ v = __riscv_vle8_v_i8mf8_tu (v, in + i + j + k + 20000, j);
+ __riscv_vse8_v_i8mf8 (out + i + j + k + 20000, v, vl);
+ }
+ }
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 4 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f2 (void * restrict in, void * restrict out, int l, int n, int m)
+{
+ size_t vl = 101;
+ for (int i = 0; i < l; i++){
+ size_t vl = i + vl + 44;
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j + k + 10000, vl);
+ v = __riscv_vle8_v_i8mf8_tu (v, in + i + j + k + 20000, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + k + 20000, v, vl);
+ }
+ }
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int l, int n, int m, size_t cond)
+{
+ size_t vl = 555;
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j + k, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + k, v, vl);
+ }
+ }
+ }
+
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ out[i+j+k+10000000] = out[i+j+k+10000000] + in[i+j+k+10000000];
+ }
+ }
+ }
+
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j + k + 10000, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + k + 10000, v, vl);
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, int n)
+{
+ register size_t vl asm ("a5");
+ for (int i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*a5,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int l, int n, int m, size_t cond)
+{
+ size_t vl = 555;
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j + k, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + k, v, vl);
+ }
+ }
+ }
+
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ out[i+j+k+10000000] = out[i+j+k+10000000] + in[i+j+k+10000000];
+ }
+ }
+ }
+
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j + k + 10000, vl);
+ v = __riscv_vle8_v_i8mf8_tu (v, in + i + j + k + 20000, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + k + 10000, v, vl);
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int l, int n, int m, size_t cond)
+{
+ size_t vl = 555;
+
+ if (cond) {
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j + k, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + k, v, vl);
+ }
+ }
+ }
+ } else {
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ vint32mf2_t v = __riscv_vle32_v_i32mf2 ((int32_t *)(in + i + j + k), vl);
+ __riscv_vse32_v_i32mf2 ((int32_t *)(out + i + j + k), v, vl);
+ }
+ }
+ }
+ }
+
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ out[i+j+k+10000000] = out[i+j+k+10000000] + in[i+j+k+10000000];
+ }
+ }
+ }
+
+ for (int i = 0; i < l; i++){
+ for (int j = 0; j < m; j++){
+ for (int k = 0; k < n; k++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j + k + 10000, vl);
+ v = __riscv_vle8_v_i8mf8_tu (v, in + i + j + k + 20000, vl);
+ __riscv_vse8_v_i8mf8 (out + i + j + k + 10000, v, vl);
+ }
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, int n)
+{
+ size_t vl = 32;
+ for (int i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {\.L[0-9]+\:\s+vle8\.v\s+v[0-9]+,\s*0\s*\([a-x0-9]+\)} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (void * restrict in, void * restrict out, int l, int n, int m)
+{
+ int vl = 32;
+ for (int i = 0; i < n; i++)
+ {
+ vint8mf8_t v1 = __riscv_vle8_v_i8mf8 (in + i + 1, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 1, v1, vl);
+
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8 (in + i + 2, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 2, v2, vl);
+
+ vint8mf8_t v3 = __riscv_vle8_v_i8mf8 (in + i + 3, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 3, v3, vl);
+
+ vint8mf8_t v4 = __riscv_vle8_v_i8mf8 (in + i + 4, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 4, v4, vl);
+
+ vint8mf8_t v5 = __riscv_vle8_v_i8mf8 (in + i + 5, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 5, v5, vl);
+
+ vint8mf8_t v6 = __riscv_vle8_v_i8mf8 (in + i + 6, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 6, v6, vl);
+
+ vint8mf8_t v7 = __riscv_vle8_v_i8mf8 (in + i + 7, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 7, v7, vl);
+
+ vint8mf8_t v8 = __riscv_vle8_v_i8mf8 (in + i + 8, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 8, v8, vl);
+
+ vint8mf8_t v9 = __riscv_vle8_v_i8mf8 (in + i + 9, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 9, v9, vl);
+
+ vint8mf8_t v10 = __riscv_vle8_v_i8mf8 (in + i + 10, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 10, v10, vl);
+
+ vint8mf8_t v11 = __riscv_vle8_v_i8mf8 (in + i + 11, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 11, v11, vl);
+
+ vint8mf8_t v12 = __riscv_vle8_v_i8mf8 (in + i + 12, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 12, v12, vl);
+
+ vint8mf8_t v13 = __riscv_vle8_v_i8mf8 (in + i + 13, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 13, v13, vl);
+
+ vint8mf8_t v14 = __riscv_vle8_v_i8mf8 (in + i + 14, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 14, v14, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */