]>
Commit | Line | Data |
---|---|---|
a0515226 | 1 | /* { dg-require-effective-target vect_int } */ |
2 | ||
3 | #include <stdarg.h> | |
a0515226 | 4 | #include "tree-vect.h" |
5 | ||
6 | #define M00 100 | |
7 | #define M10 216 | |
8 | #define M20 23 | |
9 | #define M01 1322 | |
10 | #define M11 13 | |
11 | #define M21 27271 | |
12 | #define M02 74 | |
13 | #define M12 191 | |
14 | #define M22 500 | |
15 | ||
16 | #define K00 405 | |
17 | #define K10 112 | |
18 | #define K01 4322 | |
19 | #define K11 135 | |
20 | ||
21 | #define N 16 | |
22 | ||
23 | /* SLP with load permutation and loop-based vectorization. */ | |
24 | void foo (int *__restrict__ pInput, int *__restrict__ pOutput, | |
25 | int *__restrict__ pInput2, int *__restrict__ pOutput2) | |
26 | { | |
27 | int i, a, b, c, d; | |
28 | ||
29 | for (i = 0; i < N / 3; i++) | |
30 | { | |
31 | a = *pInput++; | |
32 | b = *pInput++; | |
33 | c = *pInput++; | |
34 | d = *pInput2++; | |
35 | ||
36 | *pOutput++ = M00 * a + M01 * b + M02 * c; | |
37 | *pOutput++ = M10 * a + M11 * b + M12 * c; | |
38 | *pOutput++ = M20 * a + M21 * b + M22 * c; | |
39 | ||
40 | /* Loop-based vectorization. */ | |
41 | *pOutput2++ = K00 * d; | |
42 | } | |
43 | } | |
44 | ||
45 | int main (int argc, const char* argv[]) | |
46 | { | |
47 | int input[N], output[N], i; | |
48 | int check_results[N] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0}; | |
49 | int input2[N], output2[N]; | |
50 | int check_results2[N] = {0, 405, 810, 1215, 1620, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; | |
51 | ||
775abe9d | 52 | check_vect (); |
53 | ||
a0515226 | 54 | for (i = 0; i < N; i++) |
55 | { | |
56 | input[i] = i%256; | |
57 | input2[i] = i%256; | |
58 | output[i] = 0; | |
59 | output2[i] = 0; | |
60 | if (input[i] > 200) | |
61 | abort (); | |
62 | } | |
63 | ||
64 | foo (input, output, input2, output2); | |
65 | ||
66 | for (i = 0; i < N; i++) | |
67 | if (output[i] != check_results[i] || output2[i] != check_results2[i]) | |
68 | abort (); | |
69 | ||
70 | return 0; | |
71 | } | |
72 | ||
9ec6ad4e | 73 | /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm } } } */ |
44c01906 | 74 | /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ |
75 | /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ | |
880ed4be | 76 | /* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm && vect_load_lanes } } } } */ |
44c01906 | 77 | /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ |
78 | /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ |