From: Thomas Schwinge Date: Fri, 21 Jan 2022 11:48:28 +0000 (+0100) Subject: Strengthen a few OpenACC test cases X-Git-Tag: basepoints/gcc-13~1512 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=087e545747ca9ee977e84326877b0ce1bc4c383a;p=thirdparty%2Fgcc.git Strengthen a few OpenACC test cases Rather than rubber-stamp whatever requested vs. actual device kernel launch configuration happens, actually (again) verify the requested values (modulo expected variations). This better highlights that "AMD GCN has an upper limit of 'num_workers(16)'", and the deficiency that "AMD GCN uses the autovectorizer for the vector dimension: the use of a function call in vector-partitioned code [...] is not currently supported". And, this removes several instances of race conditions, where variables are concurrently written to in OpenACC gang-redundant mode. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: Strengthen. * testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c: Likewise. --- diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c index e5ed2ab70062..d3f6ea24e7e3 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c @@ -19,9 +19,12 @@ int main () for (ix = 0; ix < N;ix++) ary[ix] = -1; - -#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ - copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize) + +#define NG 32 +#define NW 32 +#define VL 32 +#pragma acc parallel num_gangs(NG) num_workers(NW) vector_length(VL) \ + copy(ary) copy(ondev) /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { #pragma acc loop gang worker vector @@ -45,11 +48,19 @@ int main () else ary[ix] = ix; } - - gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG); - workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); - vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } + gangsize = NG; + workersize = NW; + vectorsize = VL; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN has an upper limit of 'num_workers(16)'. */ + if (workersize > 16) + workersize = 16; + /* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ + vectorsize = 1; +#endif for (ix = 0; ix < N; ix++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c index e73ed6064eba..4b761f0f6240 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c @@ -46,14 +46,17 @@ int main () int ary[N]; int ix; int exit = 0; - int gangsize = 0, workersize = 0, vectorsize = 0; + int gangsize, workersize, vectorsize; int *gangdist, *workerdist, *vectordist; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ - copy(ary) copyout(gangsize, workersize, vectorsize) +#define NG 32 +#define NW 32 +#define VL 32 +#pragma acc parallel num_gangs(NG) num_workers(NW) vector_length(VL) \ + copy(ary) /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { #pragma acc loop gang worker vector @@ -71,11 +74,23 @@ int main () ary[ix] = (g << 16) | (w << 8) | v; } - - gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG); - workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); - vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } + gangsize = NG; + workersize = NW; + vectorsize = VL; +#if defined ACC_DEVICE_TYPE_host + gangsize = 1; + workersize = 1; + vectorsize = 1; +#elif defined ACC_DEVICE_TYPE_radeon + /* AMD GCN has an upper limit of 'num_workers(16)'. */ + if (workersize > 16) + workersize = 16; + /* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ + vectorsize = 1; +#endif gangdist = (int *) __builtin_alloca (gangsize * sizeof (int)); workerdist = (int *) __builtin_alloca (workersize * sizeof (int)); @@ -92,6 +107,11 @@ int main () int w = (ary[ix] >> 8) & 255; int v = ary[ix] & 255; + if (g >= gangsize + || w >= workersize + || v >= vectorsize) + __builtin_abort (); + gangdist[g]++; workerdist[w]++; vectordist[v]++; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c index c1a2d0cffe11..4099d6072da9 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c @@ -16,8 +16,11 @@ int main () int t = 0, h = 0; int gangsize, workersize, vectorsize; -#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ - copy(ondev) copyout(gangsize, workersize, vectorsize) +#define NG 32 +#define NW 32 +#define VL 32 +#pragma acc parallel num_gangs(NG) num_workers(NW) vector_length(VL) \ + copy(ondev) /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { #pragma acc loop gang worker vector reduction(+:t) @@ -42,10 +45,19 @@ int main () } t += val; } - gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG); - workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); - vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } + gangsize = NG; + workersize = NW; + vectorsize = VL; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN has an upper limit of 'num_workers(16)'. */ + if (workersize > 16) + workersize = 16; + /* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ + vectorsize = 1; +#endif for (ix = 0; ix < N; ix++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c index 58c7b6ab57fa..0fe368623c31 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c @@ -17,7 +17,8 @@ int main () int t = 0, h = 0; int vectorsize; -#pragma acc parallel vector_length(32) copy(ondev) copyout(vectorsize) +#define VL 32 +#pragma acc parallel vector_length(VL) copy(ondev) /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { #pragma acc loop vector reduction (+:t) @@ -42,8 +43,14 @@ int main () } t += val; } - vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } + vectorsize = VL; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ + vectorsize = 1; +#endif for (ix = 0; ix < N; ix++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c index 85931f5e4334..0cf2d473eb81 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c @@ -17,7 +17,8 @@ int main () int q = 0, h = 0; int vectorsize; -#pragma acc parallel vector_length(32) copy(q) copy(ondev) copyout(vectorsize) +#define VL 32 +#pragma acc parallel vector_length(VL) copy(q) copy(ondev) /* { dg-note {variable 't' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { @@ -46,8 +47,14 @@ int main () t += val; } q = t; - vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } + vectorsize = VL; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ + vectorsize = 1; +#endif for (ix = 0; ix < N; ix++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c index b9ceec9887da..b0cb09399ba7 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c @@ -19,8 +19,10 @@ int main () int t = 0, h = 0; int workersize; -#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \ - copyout(workersize) +#define NW 32 +#define VL 32 +#pragma acc parallel num_workers(NW) vector_length(VL) \ + copy(ondev) /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-3 } */ { @@ -46,8 +48,13 @@ int main () } t += val; } - workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); } + workersize = NW; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN has an upper limit of 'num_workers(16)'. */ + if (workersize > 16) + workersize = 16; +#endif for (ix = 0; ix < N; ix++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c index ff5e4a1656ba..f9baedb0c466 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c @@ -19,8 +19,10 @@ int main () int q = 0, h = 0; int workersize; -#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev) \ - copyout(workersize) +#define NW 32 +#define VL 32 +#pragma acc parallel num_workers(NW) vector_length(VL) \ + copy(q) copy(ondev) /* { dg-note {variable 't' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-4 } */ @@ -50,8 +52,13 @@ int main () t += val; } q = t; - workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); } + workersize = NW; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN has an upper limit of 'num_workers(16)'. */ + if (workersize > 16) + workersize = 16; +#endif for (ix = 0; ix < N; ix++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c index 5d60899acc10..fadb2627f73e 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c @@ -16,8 +16,10 @@ int main () int t = 0, h = 0; int workersize, vectorsize; -#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \ - copyout(workersize, vectorsize) +#define NW 32 +#define VL 32 +#pragma acc parallel num_workers(NW) vector_length(VL) \ + copy(ondev) /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { #pragma acc loop worker vector reduction (+:t) @@ -42,9 +44,18 @@ int main () } t += val; } - workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); - vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } + workersize = NW; + vectorsize = VL; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN has an upper limit of 'num_workers(16)'. */ + if (workersize > 16) + workersize = 16; + /* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ + vectorsize = 1; +#endif for (ix = 0; ix < N; ix++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c index 9ccc1a89b13c..bfd22b0db7fa 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c @@ -20,8 +20,9 @@ int main () for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \ - copyout(vectorsize) +#define VL 32 +#pragma acc parallel vector_length(VL) \ + copy(ary) copy(ondev) /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { #pragma acc loop vector @@ -44,8 +45,14 @@ int main () else ary[ix] = ix; } - vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } + vectorsize = VL; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ + vectorsize = 1; +#endif for (ix = 0; ix < N; ix++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c index 0e99ec620385..b910e251fb11 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c @@ -23,8 +23,10 @@ int main () for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ - copyout(workersize) +#define NW 32 +#define VL 32 +#pragma acc parallel num_workers(NW) vector_length(VL) \ + copy(ary) copy(ondev) /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } .-3 } */ { @@ -48,8 +50,13 @@ int main () else ary[ix] = ix; } - workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); } + workersize = NW; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN has an upper limit of 'num_workers(16)'. */ + if (workersize > 16) + workersize = 16; +#endif for (ix = 0; ix < N; ix++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c index f4707d153944..77326068a666 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c @@ -20,8 +20,10 @@ int main () for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ - copyout(workersize, vectorsize) +#define NW 32 +#define VL 32 +#pragma acc parallel num_workers(NW) vector_length(VL) \ + copy(ary) copy(ondev) /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { #pragma acc loop worker vector @@ -44,9 +46,18 @@ int main () else ary[ix] = ix; } - workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); - vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } + workersize = NW; + vectorsize = VL; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN has an upper limit of 'num_workers(16)'. */ + if (workersize > 16) + workersize = 16; + /* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ + vectorsize = 1; +#endif for (ix = 0; ix < N; ix++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c index da13d84908a8..81e08119214a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c @@ -35,14 +35,27 @@ int main () for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize) +#define NG 32 +#define NW 32 +#define VL 32 +#pragma acc parallel num_gangs(NG) num_workers(NW) vector_length(VL) \ + copy(ary) copy(ondev) { ondev = acc_on_device (acc_device_not_host); gang (ary); - gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG); - workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); - vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } + gangsize = NG; + workersize = NW; + vectorsize = VL; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN has an upper limit of 'num_workers(16)'. */ + if (workersize > 16) + workersize = 16; + /* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ + vectorsize = 1; +#endif for (ix = 0; ix < N; ix++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c index dd7bb6cdcd1e..7310906bd2d6 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c @@ -35,13 +35,20 @@ int main () for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \ - copyout(vectorsize) +#define VL 32 +#pragma acc parallel vector_length(VL) \ + copy(ary) copy(ondev) { ondev = acc_on_device (acc_device_not_host); vector (ary); - vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } + vectorsize = VL; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ + vectorsize = 1; +#endif for (ix = 0; ix < N; ix++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c index b9137d819352..4521cb911437 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c @@ -39,13 +39,20 @@ int main () for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ - copyout(workersize) +#define NW 32 +#define VL 32 +#pragma acc parallel num_workers(NW) vector_length(VL) \ + copy(ary) copy(ondev) { ondev = acc_on_device (acc_device_not_host); worker (ary); - workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); } + workersize = NW; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN has an upper limit of 'num_workers(16)'. */ + if (workersize > 16) + workersize = 16; +#endif for (ix = 0; ix < N; ix++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c index 73696e4e59a3..647d075bb006 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c @@ -35,14 +35,25 @@ int main () for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ - copyout(workersize, vectorsize) +#define NW 32 +#define VL 32 +#pragma acc parallel num_workers(NW) vector_length(VL) \ + copy(ary) copy(ondev) { ondev = acc_on_device (acc_device_not_host); worker (ary); - workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); - vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } + workersize = NW; + vectorsize = VL; +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN has an upper limit of 'num_workers(16)'. */ + if (workersize > 16) + workersize = 16; + /* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ + vectorsize = 1; +#endif for (ix = 0; ix < N; ix++) {