]>
Commit | Line | Data |
---|---|---|
dbc7e6ae | 1 | /* { dg-do compile } */ |
2 | /* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ | |
3 | ||
4 | #include <stdint.h> | |
5 | ||
6 | #define add(A, B) ((A) + (B)) | |
7 | #define sub(A, B) ((A) - (B)) | |
cdb4d5d0 | 8 | #define mul(A, B) ((A) * (B)) |
9 | #define div(A, B) ((A) / (B)) | |
dbc7e6ae | 10 | #define max(A, B) ((A) > (B) ? (A) : (B)) |
11 | #define min(A, B) ((A) < (B) ? (A) : (B)) | |
12 | #define and(A, B) ((A) & (B)) | |
13 | #define ior(A, B) ((A) | (B)) | |
14 | #define xor(A, B) ((A) ^ (B)) | |
15 | ||
16 | #define N 121 | |
17 | ||
18 | #define DEF_LOOP(TYPE, CMPTYPE, OP) \ | |
19 | void __attribute__((noipa)) \ | |
20 | f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond, \ | |
21 | CMPTYPE limit, TYPE src2v, TYPE elsev) \ | |
22 | { \ | |
23 | TYPE induc = 0; \ | |
24 | for (unsigned int i = 0; i < N; ++i, induc += 1) \ | |
25 | { \ | |
26 | TYPE truev = OP (induc, src2v); \ | |
27 | dest[i] = cond[i] < limit ? truev : elsev; \ | |
28 | } \ | |
29 | } | |
30 | ||
31 | #define FOR_EACH_INT_TYPE(T, TYPE) \ | |
32 | T (TYPE, TYPE, add) \ | |
33 | T (TYPE, TYPE, sub) \ | |
cdb4d5d0 | 34 | T (TYPE, TYPE, mul) \ |
dbc7e6ae | 35 | T (TYPE, TYPE, max) \ |
36 | T (TYPE, TYPE, min) \ | |
37 | T (TYPE, TYPE, and) \ | |
38 | T (TYPE, TYPE, ior) \ | |
39 | T (TYPE, TYPE, xor) | |
40 | ||
41 | #define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \ | |
42 | T (TYPE, CMPTYPE, add) \ | |
43 | T (TYPE, CMPTYPE, sub) \ | |
cdb4d5d0 | 44 | T (TYPE, CMPTYPE, mul) \ |
45 | /* No div because that gets converted into a mul anyway. */ \ | |
dbc7e6ae | 46 | T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \ |
47 | T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX) | |
48 | ||
49 | #define FOR_EACH_LOOP(T) \ | |
50 | FOR_EACH_INT_TYPE (T, int8_t) \ | |
51 | FOR_EACH_INT_TYPE (T, int16_t) \ | |
52 | FOR_EACH_INT_TYPE (T, int32_t) \ | |
53 | FOR_EACH_INT_TYPE (T, int64_t) \ | |
54 | FOR_EACH_INT_TYPE (T, uint8_t) \ | |
55 | FOR_EACH_INT_TYPE (T, uint16_t) \ | |
56 | FOR_EACH_INT_TYPE (T, uint32_t) \ | |
57 | FOR_EACH_INT_TYPE (T, uint64_t) \ | |
58 | FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \ | |
59 | FOR_EACH_FP_TYPE (T, float, float, f32) \ | |
60 | FOR_EACH_FP_TYPE (T, double, double, f64) | |
61 | ||
62 | FOR_EACH_LOOP (DEF_LOOP) | |
63 | ||
64 | /* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */ | |
65 | ||
cdb4d5d0 | 66 | /* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b,} 16 } } */ |
67 | /* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h,} 21 } } */ | |
68 | /* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s,} 21 } } */ | |
69 | /* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d,} 21 } } */ | |
dbc7e6ae | 70 | |
71 | /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ | |
72 | /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ | |
73 | /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ | |
74 | /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ | |
75 | ||
76 | /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ | |
77 | /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ | |
78 | /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ | |
79 | /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ | |
80 | ||
cdb4d5d0 | 81 | /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ |
82 | /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ | |
83 | /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ | |
84 | /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ | |
85 | ||
dbc7e6ae | 86 | /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ |
87 | /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ | |
88 | /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ | |
89 | /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ | |
90 | ||
91 | /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ | |
92 | /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ | |
93 | /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ | |
94 | /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ | |
95 | ||
96 | /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ | |
97 | /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ | |
98 | /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ | |
99 | /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ | |
100 | ||
101 | /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ | |
102 | /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ | |
103 | /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ | |
104 | /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ | |
105 | ||
106 | /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ | |
107 | /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ | |
108 | /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ | |
109 | /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ | |
110 | ||
111 | /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ | |
112 | /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ | |
113 | /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ | |
114 | /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ | |
115 | ||
116 | /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ | |
117 | /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ | |
118 | /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ | |
119 | /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ | |
120 | ||
121 | /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ | |
122 | /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ | |
123 | /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ | |
124 | ||
125 | /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ | |
126 | /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ | |
127 | /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ | |
128 | ||
cdb4d5d0 | 129 | /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ |
130 | /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ | |
131 | /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ | |
132 | ||
dbc7e6ae | 133 | /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ |
134 | /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ | |
135 | /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ | |
136 | ||
137 | /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ | |
138 | /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ | |
139 | /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ |