change_insn (rinsn, new_pat);
}
-static void
-local_eliminate_vsetvl_insn (const vector_insn_info &dem)
-{
- const insn_info *insn = dem.get_insn ();
- if (!insn || insn->is_artificial ())
- return;
- rtx_insn *rinsn = insn->rtl ();
- const bb_info *bb = insn->bb ();
- if (vsetvl_insn_p (rinsn))
- {
- rtx vl = get_vl (rinsn);
- for (insn_info *i = insn->next_nondebug_insn ();
- real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
- {
- if (i->is_call () || i->is_asm ()
- || find_access (i->defs (), VL_REGNUM)
- || find_access (i->defs (), VTYPE_REGNUM))
- return;
-
- if (has_vtype_op (i->rtl ()))
- {
- if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ())))
- return;
- rtx avl = get_avl (i->rtl ());
- if (avl != vl)
- return;
- set_info *def = find_access (i->uses (), REGNO (avl))->def ();
- if (def->insn () != insn)
- return;
-
- vector_insn_info new_info;
- new_info.parse_insn (i);
- if (!new_info.skip_avl_compatible_p (dem))
- return;
-
- new_info.set_avl_info (dem.get_avl_info ());
- new_info = dem.merge (new_info, LOCAL_MERGE);
- change_vsetvl_insn (insn, new_info);
- eliminate_insn (PREV_INSN (i->rtl ()));
- return;
- }
- }
- }
-}
-
static bool
source_equal_p (insn_info *insn1, insn_info *insn2)
{
void pre_vsetvl (void);
/* Phase 5. */
+ void local_eliminate_vsetvl_insn (const vector_insn_info &) const;
void cleanup_insns (void) const;
/* Phase 6. */
commit_edge_insertions ();
}
+/* Local user vsetvl optimizaiton:
+
+ Case 1:
+ vsetvl a5,a4,e8,mf8
+ ...
+ vsetvl zero,a5,e8,mf8 --> Eliminate directly.
+
+ Case 2:
+ vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
+ ...
+ vsetvl zero,a5,e32,mf2 --> Eliminate directly. */
+void
+pass_vsetvl::local_eliminate_vsetvl_insn (const vector_insn_info &dem) const
+{
+ const insn_info *insn = dem.get_insn ();
+ if (!insn || insn->is_artificial ())
+ return;
+ rtx_insn *rinsn = insn->rtl ();
+ const bb_info *bb = insn->bb ();
+ if (vsetvl_insn_p (rinsn))
+ {
+ rtx vl = get_vl (rinsn);
+ for (insn_info *i = insn->next_nondebug_insn ();
+ real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
+ {
+ if (i->is_call () || i->is_asm ()
+ || find_access (i->defs (), VL_REGNUM)
+ || find_access (i->defs (), VTYPE_REGNUM))
+ return;
+
+ if (has_vtype_op (i->rtl ()))
+ {
+ if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ())))
+ return;
+ rtx avl = get_avl (i->rtl ());
+ if (avl != vl)
+ return;
+ set_info *def = find_access (i->uses (), REGNO (avl))->def ();
+ if (def->insn () != insn)
+ return;
+
+ vector_insn_info new_info
+ = m_vector_manager->vector_insn_infos[i->uid ()];
+ if (!new_info.skip_avl_compatible_p (dem))
+ return;
+
+ new_info.set_avl_info (dem.get_avl_info ());
+ new_info = dem.merge (new_info, LOCAL_MERGE);
+ change_vsetvl_insn (insn, new_info);
+ eliminate_insn (PREV_INSN (i->rtl ()));
+ return;
+ }
+ }
+ }
+}
+
/* Before VSETVL PASS, RVV instructions pattern is depending on AVL operand
implicitly. Since we will emit VSETVL instruction and make RVV instructions
depending on VL/VTYPE global status registers, we remove the such AVL operand
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+int byte_mac_vec(unsigned char *a, unsigned char *b, int len) {
+ size_t vlmax = __riscv_vsetvlmax_e8m1();
+ vint32m4_t vec_s = __riscv_vmv_v_x_i32m4(0, vlmax);
+ vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vlmax);
+ int k = len;
+
+ for (size_t vl; k > 0; k -= vl, a += vl, b += vl) {
+ vl = __riscv_vsetvl_e8m1(k);
+
+ vuint8m1_t a8s = __riscv_vle8_v_u8m1(a, vl);
+ vuint8m1_t b8s = __riscv_vle8_v_u8m1(b, vl);
+ vuint32m4_t a8s_extended = __riscv_vzext_vf4_u32m4(a8s, vl);
+ vuint32m4_t b8s_extended = __riscv_vzext_vf4_u32m4(a8s, vl);
+
+ vint32m4_t a8s_as_i32 = __riscv_vreinterpret_v_u32m4_i32m4(a8s_extended);
+ vint32m4_t b8s_as_i32 = __riscv_vreinterpret_v_u32m4_i32m4(b8s_extended);
+
+ vec_s = __riscv_vmacc_vv_i32m4_tu(vec_s, a8s_as_i32, b8s_as_i32, vl);
+ }
+
+ vint32m1_t vec_sum = __riscv_vredsum_vs_i32m4_i32m1(vec_s, vec_zero, __riscv_vsetvl_e32m4(len));
+ int sum = __riscv_vmv_x_s_i32m1_i32(vec_sum);
+
+ return sum;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m4,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */