[thirdparty/gcc.git] / gcc / testsuite / gcc.target / riscv / rvv / base / pr112431-13.c

/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */

#include "riscv_vector.h"

double __attribute__ ((noinline))
sumation (double sum0, double sum1, double sum2, double sum3, double sum4,
	  double sum5, double sum6, double sum7, double sum8, double sum9,
	  double sum10, double sum11, double sum12, double sum13, double sum14,
	  double sum15)
{
  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
	 + sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
}

double
foo (char const *buf, size_t len)
{
  double sum = 0;
  size_t vl = __riscv_vsetvlmax_e8m8 ();
  size_t step = vl * 4;
  const char *it = buf, *end = buf + len;
  for (; it + step <= end;)
    {
      vfloat32m1_t v0 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v1 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v2 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v3 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v4 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v5 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v6 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v7 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v8 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v9 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v10 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v11 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v12 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v13 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v14 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v15 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      
      asm volatile("nop" ::: "memory");
      vint64m2_t vw0 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v0, vl);
      vint64m2_t vw1 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v1, vl);
      vint64m2_t vw2 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v2, vl);
      vint64m2_t vw3 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v3, vl);
      vint64m2_t vw4 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v4, vl);
      vint64m2_t vw5 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v5, vl);
      vint64m2_t vw6 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v6, vl);
      vint64m2_t vw7 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v7, vl);
      vint64m2_t vw8 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v8, vl);
      vint64m2_t vw9 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v9, vl);
      vint64m2_t vw10 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v10, vl);
      vint64m2_t vw11 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v11, vl);
      vint64m2_t vw12 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v12, vl);
      vint64m2_t vw13 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v13, vl);
      vint64m2_t vw14 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v14, vl);
      vint64m2_t vw15 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v15, vl);

      asm volatile("nop" ::: "memory");
      double sum0 = __riscv_vmv_x_s_i64m2_i64 (vw0);
      double sum1 = __riscv_vmv_x_s_i64m2_i64 (vw1);
      double sum2 = __riscv_vmv_x_s_i64m2_i64 (vw2);
      double sum3 = __riscv_vmv_x_s_i64m2_i64 (vw3);
      double sum4 = __riscv_vmv_x_s_i64m2_i64 (vw4);
      double sum5 = __riscv_vmv_x_s_i64m2_i64 (vw5);
      double sum6 = __riscv_vmv_x_s_i64m2_i64 (vw6);
      double sum7 = __riscv_vmv_x_s_i64m2_i64 (vw7);
      double sum8 = __riscv_vmv_x_s_i64m2_i64 (vw8);
      double sum9 = __riscv_vmv_x_s_i64m2_i64 (vw9);
      double sum10 = __riscv_vmv_x_s_i64m2_i64 (vw10);
      double sum11 = __riscv_vmv_x_s_i64m2_i64 (vw11);
      double sum12 = __riscv_vmv_x_s_i64m2_i64 (vw12);
      double sum13 = __riscv_vmv_x_s_i64m2_i64 (vw13);
      double sum14 = __riscv_vmv_x_s_i64m2_i64 (vw14);
      double sum15 = __riscv_vmv_x_s_i64m2_i64 (vw15);

      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8,
		       sum9, sum10, sum11, sum12, sum13, sum14, sum15);
    }
  return sum;
}

double
foo2 (char const *buf, size_t len)
{
  double sum = 0;
  size_t vl = __riscv_vsetvlmax_e8m8 ();
  size_t step = vl * 4;
  const char *it = buf, *end = buf + len;
  for (; it + step <= end;)
    {
      vfloat32m1_t v0 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v1 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v2 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v3 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v4 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v5 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v6 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v7 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v8 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v9 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v10 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v11 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v12 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v13 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v14 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v15 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      
      asm volatile("nop" ::: "memory");
      vint64m2_t vw0 = __riscv_vfwcvt_x_f_v_i64m2 (v0, vl);
      vint64m2_t vw1 = __riscv_vfwcvt_x_f_v_i64m2 (v1, vl);
      vint64m2_t vw2 = __riscv_vfwcvt_x_f_v_i64m2 (v2, vl);
      vint64m2_t vw3 = __riscv_vfwcvt_x_f_v_i64m2 (v3, vl);
      vint64m2_t vw4 = __riscv_vfwcvt_x_f_v_i64m2 (v4, vl);
      vint64m2_t vw5 = __riscv_vfwcvt_x_f_v_i64m2 (v5, vl);
      vint64m2_t vw6 = __riscv_vfwcvt_x_f_v_i64m2 (v6, vl);
      vint64m2_t vw7 = __riscv_vfwcvt_x_f_v_i64m2 (v7, vl);
      vint64m2_t vw8 = __riscv_vfwcvt_x_f_v_i64m2 (v8, vl);
      vint64m2_t vw9 = __riscv_vfwcvt_x_f_v_i64m2 (v9, vl);
      vint64m2_t vw10 = __riscv_vfwcvt_x_f_v_i64m2 (v10, vl);
      vint64m2_t vw11 = __riscv_vfwcvt_x_f_v_i64m2 (v11, vl);
      vint64m2_t vw12 = __riscv_vfwcvt_x_f_v_i64m2 (v12, vl);
      vint64m2_t vw13 = __riscv_vfwcvt_x_f_v_i64m2 (v13, vl);
      vint64m2_t vw14 = __riscv_vfwcvt_x_f_v_i64m2 (v14, vl);
      vint64m2_t vw15 = __riscv_vfwcvt_x_f_v_i64m2 (v15, vl);

      asm volatile("nop" ::: "memory");
      double sum0 = __riscv_vmv_x_s_i64m2_i64 (vw0);
      double sum1 = __riscv_vmv_x_s_i64m2_i64 (vw1);
      double sum2 = __riscv_vmv_x_s_i64m2_i64 (vw2);
      double sum3 = __riscv_vmv_x_s_i64m2_i64 (vw3);
      double sum4 = __riscv_vmv_x_s_i64m2_i64 (vw4);
      double sum5 = __riscv_vmv_x_s_i64m2_i64 (vw5);
      double sum6 = __riscv_vmv_x_s_i64m2_i64 (vw6);
      double sum7 = __riscv_vmv_x_s_i64m2_i64 (vw7);
      double sum8 = __riscv_vmv_x_s_i64m2_i64 (vw8);
      double sum9 = __riscv_vmv_x_s_i64m2_i64 (vw9);
      double sum10 = __riscv_vmv_x_s_i64m2_i64 (vw10);
      double sum11 = __riscv_vmv_x_s_i64m2_i64 (vw11);
      double sum12 = __riscv_vmv_x_s_i64m2_i64 (vw12);
      double sum13 = __riscv_vmv_x_s_i64m2_i64 (vw13);
      double sum14 = __riscv_vmv_x_s_i64m2_i64 (vw14);
      double sum15 = __riscv_vmv_x_s_i64m2_i64 (vw15);

      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8,
		       sum9, sum10, sum11, sum12, sum13, sum14, sum15);
    }
  return sum;
}

/* { dg-final { scan-assembler-not {vmv1r} } } */
/* { dg-final { scan-assembler-not {vmv2r} } } */
/* { dg-final { scan-assembler-not {vmv4r} } } */
/* { dg-final { scan-assembler-not {vmv8r} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
Commit	Line	Data
8614cbb2 JZ	1	/* { dg-do compile } */
	2	/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
	3
	4	#include "riscv_vector.h"
	5
	6	double __attribute__ ((noinline))
	7	sumation (double sum0, double sum1, double sum2, double sum3, double sum4,
	8	double sum5, double sum6, double sum7, double sum8, double sum9,
	9	double sum10, double sum11, double sum12, double sum13, double sum14,
	10	double sum15)
	11	{
	12	return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
	13	+ sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
	14	}
	15
	16	double
	17	foo (char const *buf, size_t len)
	18	{
	19	double sum = 0;
	20	size_t vl = __riscv_vsetvlmax_e8m8 ();
	21	size_t step = vl * 4;
	22	const char it = buf, end = buf + len;
	23	for (; it + step <= end;)
	24	{
	25	vfloat32m1_t v0 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	26	it += vl;
	27	vfloat32m1_t v1 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	28	it += vl;
	29	vfloat32m1_t v2 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	30	it += vl;
	31	vfloat32m1_t v3 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	32	it += vl;
	33	vfloat32m1_t v4 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	34	it += vl;
	35	vfloat32m1_t v5 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	36	it += vl;
	37	vfloat32m1_t v6 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	38	it += vl;
	39	vfloat32m1_t v7 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	40	it += vl;
	41	vfloat32m1_t v8 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	42	it += vl;
	43	vfloat32m1_t v9 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	44	it += vl;
	45	vfloat32m1_t v10 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	46	it += vl;
	47	vfloat32m1_t v11 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	48	it += vl;
	49	vfloat32m1_t v12 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	50	it += vl;
	51	vfloat32m1_t v13 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	52	it += vl;
	53	vfloat32m1_t v14 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	54	it += vl;
	55	vfloat32m1_t v15 = __riscv_vle32_v_f32m1 ((void *) it, vl);
	56	it += vl;
	57
	58	asm volatile("nop" ::: "memory");
	59	vint64m2_t vw0 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v0, vl);
	60	vint64m2_t vw1 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v1, vl);
	61	vint64m2_t vw2 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v2, vl);
	62	vint64m2_t vw3 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v3, vl);
	63	vint64m2_t vw4 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v4, vl);
	64	vint64m2_t vw5 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v5, vl);
65	vint64m2_t vw6 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v6, vl);
66	vint64m2_t vw7 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v7, vl);
67	vint64m2_t vw8 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v8, vl);
68	vint64m2_t vw9 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v9, vl);
69	vint64m2_t vw10 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v10, vl);
70	vint64m2_t vw11 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v11, vl);
71	vint64m2_t vw12 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v12, vl);
72	vint64m2_t vw13 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v13, vl);
73	vint64m2_t vw14 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v14, vl);
74	vint64m2_t vw15 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v15, vl);
75
76	asm volatile("nop" ::: "memory");
77	double sum0 = __riscv_vmv_x_s_i64m2_i64 (vw0);
78	double sum1 = __riscv_vmv_x_s_i64m2_i64 (vw1);
79	double sum2 = __riscv_vmv_x_s_i64m2_i64 (vw2);
80	double sum3 = __riscv_vmv_x_s_i64m2_i64 (vw3);
81	double sum4 = __riscv_vmv_x_s_i64m2_i64 (vw4);
82	double sum5 = __riscv_vmv_x_s_i64m2_i64 (vw5);
83	double sum6 = __riscv_vmv_x_s_i64m2_i64 (vw6);
84	double sum7 = __riscv_vmv_x_s_i64m2_i64 (vw7);
85	double sum8 = __riscv_vmv_x_s_i64m2_i64 (vw8);
86	double sum9 = __riscv_vmv_x_s_i64m2_i64 (vw9);
87	double sum10 = __riscv_vmv_x_s_i64m2_i64 (vw10);
88	double sum11 = __riscv_vmv_x_s_i64m2_i64 (vw11);
89	double sum12 = __riscv_vmv_x_s_i64m2_i64 (vw12);
90	double sum13 = __riscv_vmv_x_s_i64m2_i64 (vw13);
91	double sum14 = __riscv_vmv_x_s_i64m2_i64 (vw14);
92	double sum15 = __riscv_vmv_x_s_i64m2_i64 (vw15);
93
94	sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8,
95	sum9, sum10, sum11, sum12, sum13, sum14, sum15);
96	}
97	return sum;
98	}
99
100	double
101	foo2 (char const *buf, size_t len)
102	{
103	double sum = 0;
104	size_t vl = __riscv_vsetvlmax_e8m8 ();
105	size_t step = vl * 4;
106	const char it = buf, end = buf + len;
107	for (; it + step <= end;)
108	{
109	vfloat32m1_t v0 = __riscv_vle32_v_f32m1 ((void *) it, vl);
110	it += vl;
111	vfloat32m1_t v1 = __riscv_vle32_v_f32m1 ((void *) it, vl);
112	it += vl;
113	vfloat32m1_t v2 = __riscv_vle32_v_f32m1 ((void *) it, vl);
114	it += vl;
115	vfloat32m1_t v3 = __riscv_vle32_v_f32m1 ((void *) it, vl);
116	it += vl;
117	vfloat32m1_t v4 = __riscv_vle32_v_f32m1 ((void *) it, vl);
118	it += vl;
119	vfloat32m1_t v5 = __riscv_vle32_v_f32m1 ((void *) it, vl);
120	it += vl;
121	vfloat32m1_t v6 = __riscv_vle32_v_f32m1 ((void *) it, vl);
122	it += vl;
123	vfloat32m1_t v7 = __riscv_vle32_v_f32m1 ((void *) it, vl);
124	it += vl;
125	vfloat32m1_t v8 = __riscv_vle32_v_f32m1 ((void *) it, vl);
126	it += vl;
127	vfloat32m1_t v9 = __riscv_vle32_v_f32m1 ((void *) it, vl);
128	it += vl;
129	vfloat32m1_t v10 = __riscv_vle32_v_f32m1 ((void *) it, vl);
130	it += vl;
131	vfloat32m1_t v11 = __riscv_vle32_v_f32m1 ((void *) it, vl);
132	it += vl;
133	vfloat32m1_t v12 = __riscv_vle32_v_f32m1 ((void *) it, vl);
134	it += vl;
135	vfloat32m1_t v13 = __riscv_vle32_v_f32m1 ((void *) it, vl);
136	it += vl;
137	vfloat32m1_t v14 = __riscv_vle32_v_f32m1 ((void *) it, vl);
138	it += vl;
139	vfloat32m1_t v15 = __riscv_vle32_v_f32m1 ((void *) it, vl);
140	it += vl;
141
142	asm volatile("nop" ::: "memory");
143	vint64m2_t vw0 = __riscv_vfwcvt_x_f_v_i64m2 (v0, vl);
144	vint64m2_t vw1 = __riscv_vfwcvt_x_f_v_i64m2 (v1, vl);
145	vint64m2_t vw2 = __riscv_vfwcvt_x_f_v_i64m2 (v2, vl);
146	vint64m2_t vw3 = __riscv_vfwcvt_x_f_v_i64m2 (v3, vl);
147	vint64m2_t vw4 = __riscv_vfwcvt_x_f_v_i64m2 (v4, vl);
148	vint64m2_t vw5 = __riscv_vfwcvt_x_f_v_i64m2 (v5, vl);
149	vint64m2_t vw6 = __riscv_vfwcvt_x_f_v_i64m2 (v6, vl);
150	vint64m2_t vw7 = __riscv_vfwcvt_x_f_v_i64m2 (v7, vl);
151	vint64m2_t vw8 = __riscv_vfwcvt_x_f_v_i64m2 (v8, vl);
152	vint64m2_t vw9 = __riscv_vfwcvt_x_f_v_i64m2 (v9, vl);
153	vint64m2_t vw10 = __riscv_vfwcvt_x_f_v_i64m2 (v10, vl);
154	vint64m2_t vw11 = __riscv_vfwcvt_x_f_v_i64m2 (v11, vl);
155	vint64m2_t vw12 = __riscv_vfwcvt_x_f_v_i64m2 (v12, vl);
156	vint64m2_t vw13 = __riscv_vfwcvt_x_f_v_i64m2 (v13, vl);
157	vint64m2_t vw14 = __riscv_vfwcvt_x_f_v_i64m2 (v14, vl);
158	vint64m2_t vw15 = __riscv_vfwcvt_x_f_v_i64m2 (v15, vl);
159
160	asm volatile("nop" ::: "memory");
161	double sum0 = __riscv_vmv_x_s_i64m2_i64 (vw0);
162	double sum1 = __riscv_vmv_x_s_i64m2_i64 (vw1);
163	double sum2 = __riscv_vmv_x_s_i64m2_i64 (vw2);
164	double sum3 = __riscv_vmv_x_s_i64m2_i64 (vw3);
165	double sum4 = __riscv_vmv_x_s_i64m2_i64 (vw4);
166	double sum5 = __riscv_vmv_x_s_i64m2_i64 (vw5);
167	double sum6 = __riscv_vmv_x_s_i64m2_i64 (vw6);
168	double sum7 = __riscv_vmv_x_s_i64m2_i64 (vw7);
169	double sum8 = __riscv_vmv_x_s_i64m2_i64 (vw8);
170	double sum9 = __riscv_vmv_x_s_i64m2_i64 (vw9);
171	double sum10 = __riscv_vmv_x_s_i64m2_i64 (vw10);
172	double sum11 = __riscv_vmv_x_s_i64m2_i64 (vw11);
173	double sum12 = __riscv_vmv_x_s_i64m2_i64 (vw12);
174	double sum13 = __riscv_vmv_x_s_i64m2_i64 (vw13);
175	double sum14 = __riscv_vmv_x_s_i64m2_i64 (vw14);
176	double sum15 = __riscv_vmv_x_s_i64m2_i64 (vw15);
177
178	sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8,
179	sum9, sum10, sum11, sum12, sum13, sum14, sum15);
180	}
181	return sum;
182	}
183
184	/* { dg-final { scan-assembler-not {vmv1r} } } */
185	/* { dg-final { scan-assembler-not {vmv2r} } } */
186	/* { dg-final { scan-assembler-not {vmv4r} } } */
187	/* { dg-final { scan-assembler-not {vmv8r} } } */
188	/* { dg-final { scan-assembler-not {csrr} } } */