]>
Commit | Line | Data |
---|---|---|
8614cbb2 JZ |
1 | /* { dg-do compile } */ |
2 | /* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ | |
3 | ||
4 | #include "riscv_vector.h" | |
5 | ||
6 | double __attribute__ ((noinline)) | |
7 | sumation (double sum0, double sum1, double sum2, double sum3, double sum4, | |
8 | double sum5, double sum6, double sum7, double sum8, double sum9, | |
9 | double sum10, double sum11, double sum12, double sum13, double sum14, | |
10 | double sum15) | |
11 | { | |
12 | return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9 | |
13 | + sum10 + sum11 + sum12 + sum13 + sum14 + sum15; | |
14 | } | |
15 | ||
16 | double | |
17 | foo (char const *buf, size_t len) | |
18 | { | |
19 | double sum = 0; | |
20 | size_t vl = __riscv_vsetvlmax_e8m8 (); | |
21 | size_t step = vl * 4; | |
22 | const char *it = buf, *end = buf + len; | |
23 | for (; it + step <= end;) | |
24 | { | |
25 | vfloat32m1_t v0 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
26 | it += vl; | |
27 | vfloat32m1_t v1 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
28 | it += vl; | |
29 | vfloat32m1_t v2 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
30 | it += vl; | |
31 | vfloat32m1_t v3 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
32 | it += vl; | |
33 | vfloat32m1_t v4 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
34 | it += vl; | |
35 | vfloat32m1_t v5 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
36 | it += vl; | |
37 | vfloat32m1_t v6 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
38 | it += vl; | |
39 | vfloat32m1_t v7 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
40 | it += vl; | |
41 | vfloat32m1_t v8 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
42 | it += vl; | |
43 | vfloat32m1_t v9 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
44 | it += vl; | |
45 | vfloat32m1_t v10 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
46 | it += vl; | |
47 | vfloat32m1_t v11 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
48 | it += vl; | |
49 | vfloat32m1_t v12 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
50 | it += vl; | |
51 | vfloat32m1_t v13 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
52 | it += vl; | |
53 | vfloat32m1_t v14 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
54 | it += vl; | |
55 | vfloat32m1_t v15 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
56 | it += vl; | |
57 | ||
58 | asm volatile("nop" ::: "memory"); | |
59 | vint64m2_t vw0 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v0, vl); | |
60 | vint64m2_t vw1 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v1, vl); | |
61 | vint64m2_t vw2 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v2, vl); | |
62 | vint64m2_t vw3 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v3, vl); | |
63 | vint64m2_t vw4 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v4, vl); | |
64 | vint64m2_t vw5 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v5, vl); | |
65 | vint64m2_t vw6 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v6, vl); | |
66 | vint64m2_t vw7 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v7, vl); | |
67 | vint64m2_t vw8 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v8, vl); | |
68 | vint64m2_t vw9 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v9, vl); | |
69 | vint64m2_t vw10 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v10, vl); | |
70 | vint64m2_t vw11 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v11, vl); | |
71 | vint64m2_t vw12 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v12, vl); | |
72 | vint64m2_t vw13 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v13, vl); | |
73 | vint64m2_t vw14 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v14, vl); | |
74 | vint64m2_t vw15 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v15, vl); | |
75 | ||
76 | asm volatile("nop" ::: "memory"); | |
77 | double sum0 = __riscv_vmv_x_s_i64m2_i64 (vw0); | |
78 | double sum1 = __riscv_vmv_x_s_i64m2_i64 (vw1); | |
79 | double sum2 = __riscv_vmv_x_s_i64m2_i64 (vw2); | |
80 | double sum3 = __riscv_vmv_x_s_i64m2_i64 (vw3); | |
81 | double sum4 = __riscv_vmv_x_s_i64m2_i64 (vw4); | |
82 | double sum5 = __riscv_vmv_x_s_i64m2_i64 (vw5); | |
83 | double sum6 = __riscv_vmv_x_s_i64m2_i64 (vw6); | |
84 | double sum7 = __riscv_vmv_x_s_i64m2_i64 (vw7); | |
85 | double sum8 = __riscv_vmv_x_s_i64m2_i64 (vw8); | |
86 | double sum9 = __riscv_vmv_x_s_i64m2_i64 (vw9); | |
87 | double sum10 = __riscv_vmv_x_s_i64m2_i64 (vw10); | |
88 | double sum11 = __riscv_vmv_x_s_i64m2_i64 (vw11); | |
89 | double sum12 = __riscv_vmv_x_s_i64m2_i64 (vw12); | |
90 | double sum13 = __riscv_vmv_x_s_i64m2_i64 (vw13); | |
91 | double sum14 = __riscv_vmv_x_s_i64m2_i64 (vw14); | |
92 | double sum15 = __riscv_vmv_x_s_i64m2_i64 (vw15); | |
93 | ||
94 | sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, | |
95 | sum9, sum10, sum11, sum12, sum13, sum14, sum15); | |
96 | } | |
97 | return sum; | |
98 | } | |
99 | ||
100 | double | |
101 | foo2 (char const *buf, size_t len) | |
102 | { | |
103 | double sum = 0; | |
104 | size_t vl = __riscv_vsetvlmax_e8m8 (); | |
105 | size_t step = vl * 4; | |
106 | const char *it = buf, *end = buf + len; | |
107 | for (; it + step <= end;) | |
108 | { | |
109 | vfloat32m1_t v0 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
110 | it += vl; | |
111 | vfloat32m1_t v1 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
112 | it += vl; | |
113 | vfloat32m1_t v2 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
114 | it += vl; | |
115 | vfloat32m1_t v3 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
116 | it += vl; | |
117 | vfloat32m1_t v4 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
118 | it += vl; | |
119 | vfloat32m1_t v5 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
120 | it += vl; | |
121 | vfloat32m1_t v6 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
122 | it += vl; | |
123 | vfloat32m1_t v7 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
124 | it += vl; | |
125 | vfloat32m1_t v8 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
126 | it += vl; | |
127 | vfloat32m1_t v9 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
128 | it += vl; | |
129 | vfloat32m1_t v10 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
130 | it += vl; | |
131 | vfloat32m1_t v11 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
132 | it += vl; | |
133 | vfloat32m1_t v12 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
134 | it += vl; | |
135 | vfloat32m1_t v13 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
136 | it += vl; | |
137 | vfloat32m1_t v14 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
138 | it += vl; | |
139 | vfloat32m1_t v15 = __riscv_vle32_v_f32m1 ((void *) it, vl); | |
140 | it += vl; | |
141 | ||
142 | asm volatile("nop" ::: "memory"); | |
143 | vint64m2_t vw0 = __riscv_vfwcvt_x_f_v_i64m2 (v0, vl); | |
144 | vint64m2_t vw1 = __riscv_vfwcvt_x_f_v_i64m2 (v1, vl); | |
145 | vint64m2_t vw2 = __riscv_vfwcvt_x_f_v_i64m2 (v2, vl); | |
146 | vint64m2_t vw3 = __riscv_vfwcvt_x_f_v_i64m2 (v3, vl); | |
147 | vint64m2_t vw4 = __riscv_vfwcvt_x_f_v_i64m2 (v4, vl); | |
148 | vint64m2_t vw5 = __riscv_vfwcvt_x_f_v_i64m2 (v5, vl); | |
149 | vint64m2_t vw6 = __riscv_vfwcvt_x_f_v_i64m2 (v6, vl); | |
150 | vint64m2_t vw7 = __riscv_vfwcvt_x_f_v_i64m2 (v7, vl); | |
151 | vint64m2_t vw8 = __riscv_vfwcvt_x_f_v_i64m2 (v8, vl); | |
152 | vint64m2_t vw9 = __riscv_vfwcvt_x_f_v_i64m2 (v9, vl); | |
153 | vint64m2_t vw10 = __riscv_vfwcvt_x_f_v_i64m2 (v10, vl); | |
154 | vint64m2_t vw11 = __riscv_vfwcvt_x_f_v_i64m2 (v11, vl); | |
155 | vint64m2_t vw12 = __riscv_vfwcvt_x_f_v_i64m2 (v12, vl); | |
156 | vint64m2_t vw13 = __riscv_vfwcvt_x_f_v_i64m2 (v13, vl); | |
157 | vint64m2_t vw14 = __riscv_vfwcvt_x_f_v_i64m2 (v14, vl); | |
158 | vint64m2_t vw15 = __riscv_vfwcvt_x_f_v_i64m2 (v15, vl); | |
159 | ||
160 | asm volatile("nop" ::: "memory"); | |
161 | double sum0 = __riscv_vmv_x_s_i64m2_i64 (vw0); | |
162 | double sum1 = __riscv_vmv_x_s_i64m2_i64 (vw1); | |
163 | double sum2 = __riscv_vmv_x_s_i64m2_i64 (vw2); | |
164 | double sum3 = __riscv_vmv_x_s_i64m2_i64 (vw3); | |
165 | double sum4 = __riscv_vmv_x_s_i64m2_i64 (vw4); | |
166 | double sum5 = __riscv_vmv_x_s_i64m2_i64 (vw5); | |
167 | double sum6 = __riscv_vmv_x_s_i64m2_i64 (vw6); | |
168 | double sum7 = __riscv_vmv_x_s_i64m2_i64 (vw7); | |
169 | double sum8 = __riscv_vmv_x_s_i64m2_i64 (vw8); | |
170 | double sum9 = __riscv_vmv_x_s_i64m2_i64 (vw9); | |
171 | double sum10 = __riscv_vmv_x_s_i64m2_i64 (vw10); | |
172 | double sum11 = __riscv_vmv_x_s_i64m2_i64 (vw11); | |
173 | double sum12 = __riscv_vmv_x_s_i64m2_i64 (vw12); | |
174 | double sum13 = __riscv_vmv_x_s_i64m2_i64 (vw13); | |
175 | double sum14 = __riscv_vmv_x_s_i64m2_i64 (vw14); | |
176 | double sum15 = __riscv_vmv_x_s_i64m2_i64 (vw15); | |
177 | ||
178 | sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, | |
179 | sum9, sum10, sum11, sum12, sum13, sum14, sum15); | |
180 | } | |
181 | return sum; | |
182 | } | |
183 | ||
184 | /* { dg-final { scan-assembler-not {vmv1r} } } */ | |
185 | /* { dg-final { scan-assembler-not {vmv2r} } } */ | |
186 | /* { dg-final { scan-assembler-not {vmv4r} } } */ | |
187 | /* { dg-final { scan-assembler-not {vmv8r} } } */ | |
188 | /* { dg-final { scan-assembler-not {csrr} } } */ |