]>
Commit | Line | Data |
---|---|---|
1cc521f1 | 1 | /* { dg-do run } */ |
5869faff | 2 | /* { dg-options "-O3 -floop-unroll-and-jam -fno-tree-loop-im --param unroll-jam-min-percent=0 -fdump-tree-unrolljam-details" } */ |
76c7e7d6 | 3 | /* { dg-additional-options "--param max-completely-peel-times=16" { target { s390*-*-* } } } */ |
1cc521f1 MM |
4 | /* { dg-require-effective-target int32plus } */ |
5 | ||
6 | #include <stdio.h> | |
7 | extern unsigned int a[]; | |
8 | extern unsigned int b[]; | |
9 | extern unsigned int aa[][1024]; | |
10 | unsigned int checksum; | |
11 | void checkaa(void) | |
12 | { | |
13 | unsigned sum = 1; | |
14 | unsigned long i, j; | |
15 | for (i = 0; i < 1024; i++) { | |
16 | for (j = 0; j < 16; j++) { | |
17 | sum += aa[j][i]*31+47; | |
18 | } | |
19 | } | |
20 | checksum = checksum * 27 + sum; | |
21 | //printf(" %d\n", sum); | |
22 | } | |
23 | ||
24 | void checkb(void) | |
25 | { | |
26 | unsigned sum = 1; | |
27 | unsigned long i, j; | |
28 | for (i = 0; i < 1024; i++) { | |
29 | sum += b[i]*31+47; | |
30 | } | |
31 | checksum = checksum * 27 + sum; | |
32 | //printf(" %d\n", sum); | |
33 | } | |
34 | ||
35 | #define TEST(name, body, test) \ | |
36 | static void __attribute__((noinline,noclone)) name (unsigned long n, unsigned long m) \ | |
37 | { \ | |
5869faff | 38 | unsigned i, j; \ |
1cc521f1 MM |
39 | for (i = 1; i < m; i++) { \ |
40 | for (j = 1; j < n; j++) { \ | |
41 | body; \ | |
42 | } \ | |
43 | } \ | |
44 | test; \ | |
45 | } \ | |
46 | static void __attribute__((noinline,noclone,optimize("O1"))) name ## noopt (unsigned long n, unsigned long m) \ | |
47 | { \ | |
48 | unsigned long i, j; \ | |
49 | for (i = 1; i < m; i++) { \ | |
50 | for (j = 1; j < n; j++) { \ | |
51 | body; \ | |
52 | } \ | |
53 | } \ | |
54 | test; \ | |
55 | } | |
56 | TEST(foo1, aa[i+1][j+1]=aa[i][j] * aa[i][j] / 2, checkaa()) //ok, -1,-1 | |
57 | TEST(foo2, aa[i][j+1]=3*aa[i+1][j], checkaa()) //notok, 1,-1 | |
58 | TEST(foo3, aa[i+1][j-1]=aa[i][j] * aa[i][j] / 2, checkaa()) //notok, -1,1 | |
59 | TEST(foo4, aa[i][j] = aa[i-1][j+1] * aa[i-1][j+1] / 2, checkaa()) //notok, -1,1 | |
60 | TEST(foo5, aa[i][j] = aa[i+1][j+1] * aa[i+1][j+1] / 2, checkaa()) //ok, 1,1 | |
61 | TEST(foo6, aa[i][j] = aa[i+1][j] * aa[i+1][j] / 2, checkaa()) //ok, -1,0 | |
92781ff1 MM |
62 | TEST(foo61, aa[i][0] = aa[i+1][0] * aa[i+1][0] / 2, checkaa()) //notok, -1,0 |
63 | TEST(foo62, aa[i][j/2] = aa[i+1][j/2] * aa[i+1][j/2] / 2, checkaa()) //notok, not affine | |
64 | TEST(foo63, aa[i][j%2] = aa[i+1][j%2] * aa[i+1][j%2] / 2, checkaa()) //notok, not affine | |
1cc521f1 MM |
65 | TEST(foo7, aa[i+1][j] = aa[i][j] * aa[i][j] / 2, checkaa()) //ok, 1,0 |
66 | TEST(foo9, b[j] = 3*b[j+1] + 1, checkb()) //notok, 0,-1 | |
67 | TEST(foo10, b[j] = 3*b[j] + 1, checkb()) //ok, 0,0 | |
92781ff1 MM |
68 | extern int f; |
69 | TEST(foo11, f = b[i-1] = 1 + 3* b[i+1], checkb()) //ok, 2,0 but must reduce unroll factor to 2, (it would be incorrect with unroll-by-3, which the profitability would suggest) | |
1cc521f1 MM |
70 | |
71 | /* foo8 should work as well, but currently doesn't because the distance | |
72 | vectors we compute are too pessimistic. We compute | |
73 | (0,1), (1,1) and (1,-1) | |
74 | and the last one causes us to lose. */ | |
75 | TEST(foo8, b[j+1] = 3*b[j] + 1, checkb()) //ok, 0,1 | |
76 | ||
92781ff1 | 77 | int f; |
1cc521f1 MM |
78 | unsigned int a[1024]; |
79 | unsigned int b[1024]; | |
80 | unsigned int aa[16][1024]; | |
81 | void init(void) | |
82 | { | |
83 | unsigned long i,j; | |
84 | for (i = 0; i < 1024; i++) { | |
85 | for (j = 0; j < 16; j++) { | |
86 | aa[j][i] = ((j+1)*2+i+1) % 17; | |
87 | } | |
88 | a[i] = ((i+1)*31) % 19; | |
89 | b[i] = ((i+1)*47) % 23; | |
90 | } | |
91 | checksum = 1; | |
92 | } | |
93 | ||
94 | #define RUN(name) \ | |
95 | printf(" %s\n", #name); \ | |
96 | init();for(i=0;i<4;i++)name##noopt(32,8); checka = checksum; \ | |
97 | init();for(i=0;i<4;i++)name(32,8); \ | |
92781ff1 | 98 | if (checka != checksum) fail = 1; \ |
1cc521f1 MM |
99 | printf("%sok %s\n", checka != checksum ? "NOT " : "", #name); |
100 | ||
101 | int main() | |
102 | { | |
92781ff1 | 103 | int fail = 0; |
1cc521f1 MM |
104 | int i; |
105 | unsigned checka; | |
106 | RUN(foo1); | |
107 | RUN(foo2); | |
108 | RUN(foo3); | |
109 | RUN(foo4); | |
110 | RUN(foo5); | |
111 | RUN(foo6); | |
92781ff1 MM |
112 | RUN(foo61); |
113 | RUN(foo62); | |
114 | RUN(foo63); | |
1cc521f1 MM |
115 | RUN(foo7); |
116 | RUN(foo8); | |
117 | RUN(foo9); | |
118 | RUN(foo10); | |
92781ff1 MM |
119 | RUN(foo11); |
120 | if (fail) | |
121 | __builtin_abort(); | |
122 | return fail; | |
1cc521f1 MM |
123 | } |
124 | ||
92781ff1 MM |
125 | /* Six loops should be unroll-jammed (actually seven, but see above). */ |
126 | /* { dg-final { scan-tree-dump-times "applying unroll and jam" 6 "unrolljam" } } */ |