]>
Commit | Line | Data |
---|---|---|
6e0df3b7 TS |
1 | /* This code uses nvptx inline assembly guarded with acc_on_device, which is |
2 | not optimized away at -O0, and then confuses the target assembler. | |
3 | { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ | |
e3091406 NS |
4 | |
5 | #include <stdio.h> | |
6 | ||
7 | #define N (32*32*32+17) | |
8 | int main () | |
9 | { | |
10 | int ary[N]; | |
11 | int ix; | |
12 | int exit = 0; | |
13 | int ondev = 0; | |
14 | ||
15 | for (ix = 0; ix < N;ix++) | |
16 | ary[ix] = -1; | |
17 | ||
7d6206fe | 18 | #pragma acc parallel num_gangs(32) copy(ary) copy(ondev) |
e3091406 NS |
19 | { |
20 | #pragma acc loop gang | |
21 | for (unsigned ix = 0; ix < N; ix++) | |
22 | { | |
23 | if (__builtin_acc_on_device (5)) | |
24 | { | |
25 | int g = 0, w = 0, v = 0; | |
26 | ||
27 | __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); | |
28 | __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); | |
29 | __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); | |
30 | ary[ix] = (g << 16) | (w << 8) | v; | |
31 | ondev = 1; | |
32 | } | |
33 | else | |
34 | ary[ix] = ix; | |
35 | } | |
36 | } | |
37 | ||
38 | for (ix = 0; ix < N; ix++) | |
39 | { | |
40 | int expected = ix; | |
41 | if(ondev) | |
42 | { | |
43 | int g = ix / ((N + 31) / 32); | |
44 | int w = 0; | |
45 | int v = 0; | |
46 | ||
47 | expected = (g << 16) | (w << 8) | v; | |
48 | } | |
49 | ||
50 | if (ary[ix] != expected) | |
51 | { | |
52 | exit = 1; | |
53 | printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected); | |
54 | } | |
55 | } | |
56 | ||
57 | return exit; | |
58 | } |