]>
Commit | Line | Data |
---|---|---|
23525216 | 1 | /* { dg-do run } */ |
2 | ||
3 | #include <stdlib.h> | |
4 | ||
5 | #define EPS 0.0001 | |
6 | #define N 1024*1024 | |
7 | ||
8 | void init (float B[], float C[], int n) | |
9 | { | |
10 | int i; | |
11 | for (i = 0; i < n; i++) | |
12 | { | |
13 | B[i] = 0.1 * i; | |
14 | C[i] = 0.01 * i * i; | |
15 | } | |
16 | } | |
17 | ||
18 | float dotprod_ref (float B[], float C[], int n) | |
19 | { | |
20 | int i; | |
21 | float sum = 0.0; | |
22 | ||
23 | for (i = 0; i < n; i++) | |
24 | sum += B[i] * C[i]; | |
25 | ||
26 | return sum; | |
27 | } | |
28 | ||
29 | float dotprod (float B[], float C[], int n, int block_size, | |
30 | int num_teams, int block_threads) | |
31 | { | |
32 | int i, i0; | |
33 | float sum = 0; | |
34 | ||
43895be5 | 35 | #pragma omp target map(to: B[0:n], C[0:n]) map(tofrom: sum) |
23525216 | 36 | #pragma omp teams num_teams(num_teams) thread_limit(block_threads) \ |
37 | reduction(+:sum) | |
38 | #pragma omp distribute | |
39 | for (i0 = 0; i0 < n; i0 += block_size) | |
40 | #pragma omp parallel for reduction(+:sum) | |
41 | for (i = i0; i < ((i0 + block_size > n) ? n : i0 + block_size); i++) | |
42 | sum += B[i] * C[i]; | |
43 | ||
44 | return sum; | |
45 | } | |
46 | ||
47 | void check (float a, float b) | |
48 | { | |
49 | float err = (b == 0.0) ? a : (a - b) / b; | |
50 | if (((err > 0) ? err : -err) > EPS) | |
51 | abort (); | |
52 | } | |
53 | ||
54 | int main () | |
55 | { | |
56 | float *v1 = (float *) malloc (N * sizeof (float)); | |
57 | float *v2 = (float *) malloc (N * sizeof (float)); | |
58 | ||
59 | float p1, p2; | |
60 | ||
61 | init (v1, v2, N); | |
62 | ||
63 | p1 = dotprod_ref (v1, v2, N); | |
f3d688fb | 64 | p2 = dotprod (v1, v2, N, N / 8, 2, 8); |
23525216 | 65 | |
66 | check (p1, p2); | |
67 | ||
68 | free (v1); | |
69 | free (v2); | |
70 | ||
71 | return 0; | |
72 | } |