RISC-V: Adjust LMUL when using maximum SEW [PR117955].

author Robin Dapp <rdapp@ventanamicro.com>

Tue, 25 Feb 2025 11:55:08 +0000 (12:55 +0100)

committer Robin Dapp <rdapp@ventanamicro.com>

Thu, 13 Mar 2025 09:29:44 +0000 (10:29 +0100)
author Robin Dapp <rdapp@ventanamicro.com>
Tue, 25 Feb 2025 11:55:08 +0000 (12:55 +0100)
committer Robin Dapp <rdapp@ventanamicro.com>
Thu, 13 Mar 2025 09:29:44 +0000 (10:29 +0100)
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc

index 82284624a24238f6ae06f152589dbabd19561a01..f0165f7b8c8ca25f25060b2718d9eb6f3774a215 100644 (file)
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1729,9 +1729,11 @@ private:
    }
    inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
    {
-    int max_sew = MAX (prev.get_sew (), next.get_sew ());
-    prev.set_sew (max_sew);
-    prev.set_ratio (calculate_ratio (prev.get_sew (), prev.get_vlmul ()));
+    bool prev_sew_larger = prev.get_sew () >= next.get_sew ();
+    const vsetvl_info from = prev_sew_larger ? prev : next;
+    prev.set_sew (from.get_sew ());
+    prev.set_vlmul (from.get_vlmul ());
+    prev.set_ratio (from.get_ratio ());
      use_min_of_max_sew (prev, next);
    }
    inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c

new file mode 100644 (file)

index 0000000..fe3a1ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c
@@ -0,0 +1,93 @@
+/* { dg-do run { target { rv64 } } } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh_ok } */
+/* { dg-options " -march=rv64gcv_zvfh -mabi=lp64d -O2" } */
+
+#include <riscv_vector.h>
+
+int8_t a[1];
+uint16_t b[1];
+float c[1], n[1];
+uint16_t d[1];
+uint8_t e[1];
+uint16_t f[1];
+_Float16 g[1], k[1], m[1], p[1];
+uint16_t i[1];
+int8_t j[1];
+uint8_t o[1];
+uint32_t l[1];
+uint16_t q[1];
+uint32_t r[1];
+uint32_t s[1];
+int16_t t[1];
+int main()
+{
+  int u = 25;
+  int8_t *v = a;
+  uint32_t *w;
+  uint16_t *aa = b;
+  float *ab = c, *as = n;
+  uint32_t *ad;
+  uint16_t *ah = f;
+  _Float16 *ai = g, *aj = k, *an = m, *au = p;
+  int32_t *ak;
+  int16_t *al;
+  uint16_t *am = i;
+  int8_t *ao = j;
+  uint8_t *ap = o;
+  uint32_t *aq = l;
+  uint16_t *ar = q;
+  uint32_t *at = r;
+  uint32_t *av = s;
+  int32_t *ax;
+  int16_t *ay = t;
+  for (size_t az; u; u -= az)
+  {
+    az = __riscv_vsetvl_e32m8(u);
+    vint8m2_t ba = __riscv_vle8_v_i8m2(v, az);
+    vbool4_t bb = __riscv_vmseq_vx_i8m2_b4(ba, 1, az);
+    vuint16m4_t bc = __riscv_vsll_vx_u16m4(__riscv_vid_v_u16m4(az), 2, az);
+    vuint32m8_t bd = __riscv_vsll_vx_u32m8(__riscv_vid_v_u32m8(az), 1, az);
+    vuint32m8_t be = __riscv_vluxei16_v_u32m8_m(bb, w, bc, az);
+    vuint16m4_t bf;
+    __riscv_vsuxei16_v_u32m8_m(bb, aq, bf, be, az);
+    vuint8m2_t bg = __riscv_vsll_vx_u8m2(__riscv_vid_v_u8m2(az), 1, az);
+    vuint16m4_t bh = __riscv_vloxei8_v_u16m4(aa, bg, az);
+    vfloat16m4_t bi;
+    vuint16m4_t bj = __riscv_vsll_vx_u16m4(__riscv_vid_v_u16m4(az), 1, az);
+    vint16m4_t bk = __riscv_vloxei32_v_i16m4_m(bb, al, bd, az);
+    __riscv_vsse16_v_u16m4(ar, 2, bh, az);
+    vuint16m4_t bl = __riscv_vloxei16_v_u16m4(d, bj, az);
+    vfloat16m4_t bm = __riscv_vle16_v_f16m4(ai, az);
+    vuint16m4_t bn = __riscv_vlse16_v_u16m4(ah, 2, az);
+    vint32m8_t bo = __riscv_vle32_v_i32m8_m(bb, ak, az);
+    vfloat16m1_t bp = __riscv_vle16_v_f16m1(aj, az);
+    vuint16m4_t bq = __riscv_vrgatherei16_vv_u16m4(bl, bn, az);
+    __riscv_vse16_v_u16m4(am, bq, az);
+    vfloat16m1_t br = __riscv_vfredusum_vs_f16m4_f16m1_m(bb, bm, bp, az);
+    vuint8m2_t bs;
+    vuint32m8_t bt;
+    __riscv_vse16_v_f16m1(an, br, az);
+    vfloat32m8_t bu = __riscv_vloxei8_v_f32m8(ab, bs, az);
+    __riscv_vse16_v_i16m4(ay, bk, az);
+    bi = __riscv_vfmv_s_f_f16m4(1, az);
+    __riscv_vse16_v_f16m4(au, bi, az);
+    vuint16m4_t bw = __riscv_vsll_vx_u16m4(__riscv_vid_v_u16m4(az), 0, az);
+    vuint32m8_t by = __riscv_vle32_v_u32m8_m(bb, ad, az);
+    bt = __riscv_vmv_s_x_u32m8(3090659, az);
+    __riscv_vse32_v_u32m8(at, bt, az);
+    vuint8m2_t bz = __riscv_vloxei16_v_u8m2(e, bw, az);
+    __riscv_vse32_v_u32m8(av, by, az);
+    vint8m2_t cd;
+    __riscv_vse8_v_i8m2(ao, cd, az);
+    __riscv_vsse32_v_i32m8_m(bb, ax, 4, bo, az);
+    __riscv_vse32_v_f32m8(as, bu, az);
+    vuint16m4_t cf;
+    __riscv_vsoxei16_v_u32m8(aq, cf, be, az);
+    vuint8m2_t cg = __riscv_vmulhu_vx_u8m2(bz, 0, az);
+    vuint32m8_t ch = __riscv_vsll_vx_u32m8(__riscv_vid_v_u32m8(az), 0, az);
+    __riscv_vsoxei32_v_u8m2(ap, ch, cg, az);
+  }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c

index af3a8610d63ff919828c4577ce58df9160c8240d..60fdfc419e6a2a5908a009ac90ac20345d9586ca 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
@@ -1,14 +1,41 @@
-/* { dg-do compile { target { rv64 } } } */
+/* { dg-do run { target { rv64 } } } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh_ok } */
  /* { dg-options " -march=rv64gcv_zvfh -mabi=lp64d -O2 --param=vsetvl-strategy=optim -fno-schedule-insns  -fno-schedule-insns2 -fno-schedule-fusion " } */
  
  #include <riscv_vector.h>
  
  void
-foo (uint8_t *ptr, vfloat16m4_t *v1, vuint32m8_t *v2, vuint8m2_t *v3, size_t vl)
+__attribute__ ((noipa))
+foo (vfloat16m4_t *v1, vuint32m8_t *v2, vuint8m2_t *v3, size_t vl)
  {
    *v1 = __riscv_vfmv_s_f_f16m4 (1, vl);
    *v2 = __riscv_vmv_s_x_u32m8 (2963090659u, vl);
    *v3 = __riscv_vsll_vx_u8m2 (__riscv_vid_v_u8m2 (vl), 2, vl);
  }
  
-/* { dg-final { scan-assembler-not {vsetvli.*zero,zero} } }*/
+int
+main ()
+{
+  vfloat16m4_t v1;
+  vuint32m8_t v2;
+  vuint8m2_t v3;
+  int vl = 4;
+  foo (&v1, &v2, &v3, vl);
+
+  _Float16 val1 = ((_Float16 *)&v1)[0];
+  if (val1 - 1.0000f > 0.00001f)
+    __builtin_abort ();
+
+  uint32_t val2 = ((uint32_t *)&v2)[0];
+  if (val2 != 2963090659u)
+    __builtin_abort ();
+
+  for (int i = 0; i < vl; i++)
+    {
+      uint8_t val = ((uint8_t *)&v3)[i];
+      if (val != i << 2)
+        __builtin_abort ();
+    }
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c

new file mode 100644 (file)

index 0000000..81e3a6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3" } */
+
+#include <riscv_vector.h>
+
+_Float16 a (uint64_t);
+int8_t b () {
+  int c = 100;
+  double *d;
+  _Float16 *e;
+  for (size_t f;; c -= f)
+    {
+      f = c;
+      __riscv_vsll_vx_u8mf8 (__riscv_vid_v_u8mf8 (f), 2, f);
+      vfloat16mf4_t g;
+      a (1);
+      g = __riscv_vfmv_s_f_f16mf4 (2, f);
+      vfloat64m1_t i = __riscv_vfmv_s_f_f64m1 (30491, f);
+      vuint16mf4_t j;
+      __riscv_vsoxei16_v_f16mf4 (e, j, g, f);
+      vuint8mf8_t k = __riscv_vsll_vx_u8mf8 (__riscv_vid_v_u8mf8 (f), 3, f);
+      __riscv_vsoxei8_v_f64m1 (d, k, i, f);
+    }
+}
+
+/* { dg-final { scan-assembler-not "e64,mf4" } } */
author	Robin Dapp <rdapp@ventanamicro.com>
	Tue, 25 Feb 2025 11:55:08 +0000 (12:55 +0100)
committer	Robin Dapp <rdapp@ventanamicro.com>
	Thu, 13 Mar 2025 09:29:44 +0000 (10:29 +0100)
gcc/config/riscv/riscv-vsetvl.cc		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c	[new file with mode: 0644]	patch \| blob