From: Julian Brown Date: Tue, 26 Feb 2019 21:18:36 +0000 (-0800) Subject: Adjustments and additions to testcases X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c40efbf10d4ce8be1f30715f586da648746fd868;p=thirdparty%2Fgcc.git Adjustments and additions to testcases Some additions of redundant "present" clauses dropped. 2018-10-22 Cesar Philippidis gcc/testsuite/ * g++.dg/goacc/loop-1.c: New test. * g++.dg/goacc/loop-2.c: New test. * g++.dg/goacc/loop-3.c: New test. 2018-10-22 James Norris Cesar Philippidis Tom de Vries libgomp/ * testsuite/libgomp.oacc-c-c++-common/data-2.c: Update parallel regions to denote variables copyied in via acc enter data as present. * testsuite/libgomp.oacc-fortran/data-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/data-4.f90: Likewise. * testsuite/libgomp.oacc-c-c++-common/subr.h: Reimplement. * testsuite/libgomp.oacc-c-c++-common/subr.ptx: Regenerated PTX. * testsuite/libgomp.oacc-c-c++-common/timer.h: Removed. * testsuite/libgomp.oacc-c-c++-common/lib-69.c: Change async checks. * testsuite/libgomp.oacc-c-c++-common/lib-70.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-71.c: Rework kernel i/f. * testsuite/libgomp.oacc-c-c++-common/lib-72.c: Rework kernel i/f and change async checks. * testsuite/libgomp.oacc-c-c++-common/lib-73.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-74.c: Rework kernel i/f and timing checks. * testsuite/libgomp.oacc-c-c++-common/lib-75.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-76.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-77.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-78.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-80.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-81.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-82.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-93.c: New test. (cherry picked from openacc-gcc-9-branch commit 18e5e71a03b69316246a417f5d957cbaa64245a5) --- diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp index 9f9086f82e81..11be8f7ed887 100644 --- a/gcc/testsuite/ChangeLog.omp +++ b/gcc/testsuite/ChangeLog.omp @@ -1,3 +1,9 @@ +2018-10-22 Cesar Philippidis + + * g++.dg/goacc/loop-1.c: New test. + * g++.dg/goacc/loop-2.c: New test. + * g++.dg/goacc/loop-3.c: New test. + 2018-12-13 Cesar Philippidis Nathan Sidwell Julian Brown diff --git a/gcc/testsuite/g++.dg/goacc/loop-1.c b/gcc/testsuite/g++.dg/goacc/loop-1.c new file mode 100644 index 000000000000..51b20b0e2da0 --- /dev/null +++ b/gcc/testsuite/g++.dg/goacc/loop-1.c @@ -0,0 +1,23 @@ +void +f (int i, float j, int k) +{ +#pragma acc parallel num_gangs (i) num_workers (i) vector_length (i) +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel num_gangs (j) /* { dg-error "'num_gangs' expression must be integral" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel num_workers (j) /* { dg-error "'num_workers' expression must be integral" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel vector_length (j) /* { dg-error "'vector_length' expression must be integral" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; +} diff --git a/gcc/testsuite/g++.dg/goacc/loop-2.c b/gcc/testsuite/g++.dg/goacc/loop-2.c new file mode 100644 index 000000000000..ddfb4804353a --- /dev/null +++ b/gcc/testsuite/g++.dg/goacc/loop-2.c @@ -0,0 +1,70 @@ +void +f (int i, int j, int k) +{ +#pragma acc kernels +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop gang (num: 10) + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop gang (static: 10) + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop gang (static: 5, num: 10) + for (i = 0; i < 20; ++i) + ; + + +#pragma acc kernels +#pragma acc loop gang (static: 5, num: 10, *) /* { dg-error "duplicate operand to clause" } */ + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop gang (static: 5, num: 10, static: *) /* { dg-error "duplicate 'num' argument" } */ + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop worker (static: 234) /* { dg-error "expected 'num' before" } */ + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop worker (num: 234) + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop worker (num: 234, num: 12) /* { dg-error "duplicate operand to clause" } */ + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop vector /* { dg-error "gang, worker and vector must occur in this order in a loop nest" } */ + for (i = 0; i < 20; ++i) +#pragma acc loop worker + for (j = 0; j < 25; ++j) + ; + +#pragma acc kernels +#pragma acc loop worker (length: 20) /* { dg-error "expected 'num' before 'length'" } */ + for (i = 0; i < 20; ++i) +#pragma acc loop vector (length: 10) + for (j = 0; j < 25; ++j) + ; + +#pragma acc kernels +#pragma acc loop worker + for (i = 0; i < 20; ++i) +#pragma acc loop vector + for (j = 0; j < 25; ++j) + ; +} diff --git a/gcc/testsuite/g++.dg/goacc/loop-3.c b/gcc/testsuite/g++.dg/goacc/loop-3.c new file mode 100644 index 000000000000..c43b4f33e62a --- /dev/null +++ b/gcc/testsuite/g++.dg/goacc/loop-3.c @@ -0,0 +1,43 @@ +void +f (int i, int j, int k) +{ +#pragma acc kernels num_gangs (10) /* { dg-error "'num_gangs' is not valid" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels num_workers (10) /* { dg-error "'num_workers' is not valid" } */ +#pragma acc loop worker + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels vector_length (10) /* { dg-error "'vector_length' is not valid" } */ +#pragma acc loop vector + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel num_gangs (10) num_workers (20) vector_length (32) +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel num_gangs (i) num_workers (j) vector_length (k) +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel num_gangs (10, i) /* { dg-error "expected '\\)' before ',' token" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel num_workers (10, i) /* { dg-error "expected '\\)' before ',' token" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel vector_length (10, i) /* { dg-error "expected '\\)' before ',' token" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; +} diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index f810768a4c1c..1ff3517e67cb 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,33 @@ +2018-10-22 James Norris + Cesar Philippidis + Tom de Vries + + * testsuite/libgomp.oacc-c-c++-common/data-2.c: Update parallel + regions to denote variables copyied in via acc enter data as + present. + * testsuite/libgomp.oacc-fortran/data-3.f90: Likewise. + * testsuite/libgomp.oacc-fortran/data-4.f90: Likewise. + * testsuite/libgomp.oacc-c-c++-common/subr.h: Reimplement. + * testsuite/libgomp.oacc-c-c++-common/subr.ptx: Regenerated PTX. + * testsuite/libgomp.oacc-c-c++-common/timer.h: Removed. + * testsuite/libgomp.oacc-c-c++-common/lib-69.c: Change async checks. + * testsuite/libgomp.oacc-c-c++-common/lib-70.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-71.c: Rework kernel i/f. + * testsuite/libgomp.oacc-c-c++-common/lib-72.c: Rework kernel i/f and + change async checks. + * testsuite/libgomp.oacc-c-c++-common/lib-73.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-74.c: Rework kernel i/f and + timing checks. + * testsuite/libgomp.oacc-c-c++-common/lib-75.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-76.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-77.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-78.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-80.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-81.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-82.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-93.c: New test. + 2018-12-13 Cesar Philippidis Nathan Sidwell Julian Brown diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c index c10bebaab136..455ca5b45278 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c @@ -9,46 +9,14 @@ int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); - abort (); - } - - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuModuleLoad (&module, "subr.ptx"); + r = cuModuleLoad (&module, "./subr.ptx"); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuModuleLoad failed: %d\n", r); @@ -62,20 +30,6 @@ main (int argc, char **argv) abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - stream = (CUstream) acc_get_cuda_stream (0); if (stream != NULL) abort (); @@ -90,7 +44,7 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (0, stream)) abort (); - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -118,11 +72,6 @@ main (int argc, char **argv) abort (); } - acc_unmap_data (a); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c index 912b266ec390..ee06898af791 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c @@ -1,6 +1,7 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +#include #include #include #include @@ -10,47 +11,17 @@ int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; - const int N = 10; + const int N = 3; int i; CUstream streams[N]; - unsigned long *a, *d_a, dticks; - int nbytes; - float dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; + struct timeval tv1, tv2; + time_t diff; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); - abort (); - } - - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { @@ -65,20 +36,6 @@ main (int argc, char **argv) abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - for (i = 0; i < N; i++) { streams[i] = (CUstream) acc_get_cuda_stream (i); @@ -96,9 +53,29 @@ main (int argc, char **argv) abort (); } + gettimeofday (&tv1, NULL); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[0], NULL, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + r = cuCtxSynchronize (); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxLaunch failed: %d\n", r); + abort (); + } + + gettimeofday (&tv2, NULL); + + diff = tv2.tv_sec - tv1.tv_sec; + for (i = 0; i < N; i++) { - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -112,7 +89,7 @@ main (int argc, char **argv) } } - sleep ((int) (dtime / 1000.0f) + 1); + sleep ((diff + 1) * N); for (i = 0; i < N; i++) { @@ -123,10 +100,6 @@ main (int argc, char **argv) } } - acc_unmap_data (a); - - free (a); - acc_free (d_a); acc_shutdown (acc_device_nvidia); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c index e383ba04d699..920ff5f27ffb 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c @@ -10,45 +10,13 @@ int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); - abort (); - } - - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { @@ -63,20 +31,6 @@ main (int argc, char **argv) abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); if (r != CUDA_SUCCESS) { @@ -87,7 +41,7 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (0, stream)) abort (); - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -100,7 +54,12 @@ main (int argc, char **argv) abort (); } - sleep ((int) (dtime / 1000.f) + 1); + r = cuCtxSynchronize (); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxSynchronize () failed: %d\n", r); + abort (); + } if (acc_async_test_all () != 1) { @@ -108,11 +67,6 @@ main (int argc, char **argv) abort (); } - acc_unmap_data (a); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c index 43a8b7e63957..4fa9d5a6c289 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c @@ -1,6 +1,7 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +#include #include #include #include @@ -10,47 +11,15 @@ int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; - const int N = 10; + const int N = 6; int i; CUstream streams[N]; - unsigned long *a, *d_a, dticks; - int nbytes; - float dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); - abort (); - } - - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { @@ -65,20 +34,6 @@ main (int argc, char **argv) abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - for (i = 0; i < N; i++) { streams[i] = (CUstream) acc_get_cuda_stream (i); @@ -98,13 +53,12 @@ main (int argc, char **argv) for (i = 0; i < N; i++) { - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - } if (acc_async_test_all () != 0) @@ -113,7 +67,12 @@ main (int argc, char **argv) abort (); } - sleep ((int) (dtime / 1000.0f) + 1); + r = cuCtxSynchronize (); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); + abort (); + } if (acc_async_test_all () != 1) { @@ -121,11 +80,6 @@ main (int argc, char **argv) abort (); } - acc_unmap_data (a); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c index 0efcf0d52229..e25d8944622e 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c @@ -5,77 +5,53 @@ #include #include #include -#include "timer.h" +#include int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; + struct timeval tv1, tv2; + time_t t1, t2; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); + r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); + fprintf (stderr, "cuModuleLoad failed: %d\n", r); abort (); } - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); + r = cuModuleGetFunction (&delay, module, "delay"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); abort (); } - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } + gettimeofday (&tv1, NULL); - r = cuModuleLoad (&module, "subr.ptx"); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - r = cuModuleGetFunction (&delay, module, "delay"); + r = cuCtxSynchronize (); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); + gettimeofday (&tv2, NULL); - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); stream = (CUstream) acc_get_cuda_stream (0); if (stream != NULL) @@ -91,11 +67,9 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (0, stream)) abort (); - init_timers (1); + gettimeofday (&tv1, NULL); - start_timer (0); - - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -103,38 +77,31 @@ main (int argc, char **argv) } acc_wait (0); - /* Test unseen async-argument. */ - acc_wait (1); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); + + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - if (atime < dtime) + if (((abs (t2 - t1) / t1) * 100.0) > 1.0) { - fprintf (stderr, "actual time < delay time\n"); + fprintf (stderr, "too long 1\n"); abort (); } - start_timer (0); + gettimeofday (&tv1, NULL); acc_wait (0); - /* Test unseen async-argument. */ - acc_wait (1); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); + + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - if (0.010 < atime) + if (t2 > 1000) { - fprintf (stderr, "actual time too long\n"); + fprintf (stderr, "too long 2\n"); abort (); } - acc_unmap_data (a); - - fini_timers (); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c index 194221187482..53e285f38399 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c @@ -6,78 +6,55 @@ #include #include #include -#include "timer.h" +#include int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; - int N; + const int N = 2; int i; CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime, hitime, lotime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; + struct timeval tv1, tv2; + time_t t1, t2; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); + r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); + fprintf (stderr, "cuModuleLoad failed: %d\n", r); abort (); } - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); + r = cuModuleGetFunction (&delay, module, "delay"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); abort (); } - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } + gettimeofday (&tv1, NULL); - r = cuModuleLoad (&module, "subr.ptx"); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - r = cuModuleGetFunction (&delay, module, "delay"); + r = cuCtxSynchronize (); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - N = nprocs; - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); + gettimeofday (&tv2, NULL); - acc_map_data (a, d_a, nbytes); + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); stream = (CUstream) acc_get_cuda_stream (0); if (stream != NULL) @@ -93,16 +70,11 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (0, stream)) abort (); - init_timers (1); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - - start_timer (0); + gettimeofday (&tv1, NULL); for (i = 0; i < N; i++) { - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -112,27 +84,18 @@ main (int argc, char **argv) acc_wait (0); } - atime = stop_timer (0); + gettimeofday (&tv2, NULL); - hitime = dtime * N; - hitime += hitime * 0.02; + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - lotime = dtime * N; - lotime -= lotime * 0.02; + t1 *= N; - if (atime > hitime || atime < lotime) + if (((abs (t2 - t1) / t1) * 100.0) > 1.0) { - fprintf (stderr, "actual time < delay time\n"); + fprintf (stderr, "too long\n"); abort (); } - acc_unmap_data (a); - - fini_timers (); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c index 11d9d621f87f..787dcb886ef7 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c @@ -6,78 +6,55 @@ #include #include #include -#include "timer.h" +#include int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; - int N; + const int N = 2; int i; CUstream *streams; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime, hitime, lotime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; + struct timeval tv1, tv2; + time_t t1, t2; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); + r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); + fprintf (stderr, "cuModuleLoad failed: %d\n", r); abort (); } - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); + r = cuModuleGetFunction (&delay, module, "delay"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); abort (); } - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } + gettimeofday (&tv1, NULL); - r = cuModuleLoad (&module, "subr.ptx"); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - r = cuModuleGetFunction (&delay, module, "delay"); + r = cuCtxSynchronize (); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - N = nprocs; + gettimeofday (&tv2, NULL); - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); streams = (CUstream *) malloc (N * sizeof (void *)); @@ -98,16 +75,11 @@ main (int argc, char **argv) abort (); } - init_timers (1); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - - start_timer (0); + gettimeofday (&tv1, NULL); for (i = 0; i < N; i++) { - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -117,27 +89,19 @@ main (int argc, char **argv) acc_wait (i); } - atime = stop_timer (0); + gettimeofday (&tv2, NULL); - hitime = dtime * N; - hitime += hitime * 0.02; + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - lotime = dtime * N; - lotime -= lotime * 0.02; + t1 *= N; - if (atime > hitime || atime < lotime) + if (((abs (t2 - t1) / t1) * 100.0) > 1.0) { - fprintf (stderr, "actual time < delay time\n"); + fprintf (stderr, "too long\n"); abort (); } - acc_unmap_data (a); - - fini_timers (); - free (streams); - free (a); - acc_free (d_a); acc_shutdown (acc_device_nvidia); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c index 4f58fb23cfe6..0bed15fa7b35 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c @@ -6,77 +6,53 @@ #include #include #include -#include "timer.h" +#include int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; + struct timeval tv1, tv2; + time_t t1, t2; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); + r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); + fprintf (stderr, "cuModuleLoad failed: %d\n", r); abort (); } - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); + r = cuModuleGetFunction (&delay, module, "delay"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); abort (); } - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } + gettimeofday (&tv1, NULL); - r = cuModuleLoad (&module, "subr.ptx"); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - r = cuModuleGetFunction (&delay, module, "delay"); + r = cuCtxSynchronize (); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); + gettimeofday (&tv2, NULL); - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); stream = (CUstream) acc_get_cuda_stream (0); if (stream != NULL) @@ -92,11 +68,9 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (0, stream)) abort (); - init_timers (1); + gettimeofday (&tv1, NULL); - start_timer (0); - - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -105,33 +79,30 @@ main (int argc, char **argv) acc_wait_all (); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); + + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - if (atime < dtime) + if (t2 > (t1 + (t1 * 0.10))) { - fprintf (stderr, "actual time < delay time\n"); + fprintf (stderr, "too long 1\n"); abort (); } - start_timer (0); + gettimeofday (&tv1, NULL); acc_wait_all (); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); + + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - if (0.010 < atime) + if (t2 > 1000) { - fprintf (stderr, "actual time too long\n"); + fprintf (stderr, "too long 2\n"); abort (); } - acc_unmap_data (a); - - fini_timers (); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c index b2e2687e4e76..16eb446acd4c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c @@ -6,80 +6,55 @@ #include #include #include -#include "timer.h" +#include int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; - int N; + const int N = 2; int i; CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime, hitime, lotime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; - - devnum = 2; + struct timeval tv1, tv2; + time_t t1, t2; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); + r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); + fprintf (stderr, "cuModuleLoad failed: %d\n", r); abort (); } - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); + r = cuModuleGetFunction (&delay, module, "delay"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); abort (); } - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } + gettimeofday (&tv1, NULL); - r = cuModuleLoad (&module, "subr.ptx"); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - r = cuModuleGetFunction (&delay, module, "delay"); + r = cuCtxSynchronize (); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - N = nprocs; - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); + gettimeofday (&tv2, NULL); - acc_map_data (a, d_a, nbytes); + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); if (r != CUDA_SUCCESS) @@ -105,16 +80,11 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (0, stream)) abort (); - init_timers (1); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - - start_timer (0); + gettimeofday (&tv1, NULL); for (i = 0; i < N; i++) { - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -156,7 +126,7 @@ main (int argc, char **argv) acc_wait (1); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); if (acc_async_test (0) != 1) abort (); @@ -164,25 +134,16 @@ main (int argc, char **argv) if (acc_async_test (1) != 1) abort (); - hitime = dtime * N; - hitime += hitime * 0.02; + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - lotime = dtime * N; - lotime -= lotime * 0.02; + t1 *= N; - if (atime > hitime || atime < lotime) + if (((abs (t2 - t1) / t1) * 100.0) > 1.0) { - fprintf (stderr, "actual time < delay time\n"); + fprintf (stderr, "too long\n"); abort (); } - acc_unmap_data (a); - - fini_timers (); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c index d5f18f00319c..77de9ba29044 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c @@ -6,78 +6,55 @@ #include #include #include -#include "timer.h" +#include int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; - int N; + const int N = 2; int i; CUstream *streams, stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; + struct timeval tv1, tv2; + time_t t1, t2; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); + r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); + fprintf (stderr, "cuModuleLoad failed: %d\n", r); abort (); } - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); + r = cuModuleGetFunction (&delay, module, "delay"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); abort (); } - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } + gettimeofday (&tv1, NULL); - r = cuModuleLoad (&module, "subr.ptx"); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); - abort (); + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); } - r = cuModuleGetFunction (&delay, module, "delay"); + r = cuCtxSynchronize (); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); - abort (); + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); + abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 500.0; - - dticks = (unsigned long) (dtime * clkrate); + gettimeofday (&tv2, NULL); - N = nprocs; - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); streams = (CUstream *) malloc (N * sizeof (void *)); @@ -98,11 +75,6 @@ main (int argc, char **argv) abort (); } - init_timers (1); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - stream = (CUstream) acc_get_cuda_stream (N); if (stream != NULL) abort (); @@ -117,11 +89,11 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (N, stream)) abort (); - start_timer (0); + gettimeofday (&tv1, NULL); for (i = 0; i < N; i++) { - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -129,6 +101,10 @@ main (int argc, char **argv) } } + gettimeofday (&tv2, NULL); + + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); + acc_wait_all_async (N); for (i = 0; i <= N; i++) @@ -145,15 +121,13 @@ main (int argc, char **argv) abort (); } - atime = stop_timer (0); - - if (atime < dtime) + if ((t1 * N) < t2) { - fprintf (stderr, "actual time < delay time\n"); + fprintf (stderr, "too long 1\n"); abort (); } - start_timer (0); + gettimeofday (&tv1, NULL); stream = (CUstream) acc_get_cuda_stream (N + 1); if (stream != NULL) @@ -173,35 +147,33 @@ main (int argc, char **argv) acc_wait (N + 1); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); + + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - if (0.10 < atime) + if (t1 > 1000) { - fprintf (stderr, "actual time too long\n"); + fprintf (stderr, "too long 2\n"); abort (); } - start_timer (0); + gettimeofday (&tv1, NULL); acc_wait_all_async (N); acc_wait (N); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); - if (0.10 < atime) + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); + + if (t1 > 1000) { - fprintf (stderr, "actual time too long\n"); + fprintf (stderr, "too long 3\n"); abort (); } - acc_unmap_data (a); - - fini_timers (); - free (streams); - free (a); - acc_free (d_a); acc_shutdown (acc_device_nvidia); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c index be30a7f28ac0..ecf7488a0b90 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c @@ -10,46 +10,18 @@ int main (int argc, char **argv) { - CUdevice dev; CUfunction delay2; CUmodule module; CUresult r; - int N; + const int N = 32; int i; CUstream *streams; - unsigned long **a, **d_a, *tid, ticks; + unsigned long **a, **d_a, *tid; int nbytes; - void *kargs[3]; - int clkrate; - int devnum, nprocs; + void *kargs[2]; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); - abort (); - } - - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { @@ -66,10 +38,6 @@ main (int argc, char **argv) nbytes = sizeof (int); - ticks = (unsigned long) (200.0 * clkrate); - - N = nprocs; - streams = (CUstream *) malloc (N * sizeof (void *)); a = (unsigned long **) malloc (N * sizeof (unsigned long *)); @@ -103,8 +71,7 @@ main (int argc, char **argv) for (i = 0; i < N; i++) { kargs[0] = (void *) &d_a[i]; - kargs[1] = (void *) &ticks; - kargs[2] = (void *) &tid[i]; + kargs[1] = (void *) &tid[i]; r = cuLaunchKernel (delay2, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); if (r != CUDA_SUCCESS) @@ -112,8 +79,6 @@ main (int argc, char **argv) fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - - ticks = (unsigned long) (50.0 * clkrate); } acc_wait_all_async (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-93.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-93.c new file mode 100644 index 000000000000..bc60a16c64f1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-93.c @@ -0,0 +1,19 @@ +/* { dg-do run { target { ! openacc_nvidia_accel_configured } } } */ + +#include +#include + +int +main (void) +{ + fprintf (stderr, "CheCKpOInT\n"); + acc_init (acc_device_nvidia); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */ +/* { dg-output "device type nvidia not supported" } */ +/* { dg-shouldfail "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h index ca29f0889e4e..0c9096fd8e80 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h @@ -1,46 +1,24 @@ -#ifdef ACC_DEVICE_TYPE_nvidia - #pragma acc routine nohost -static int clock (void) -{ - int thetime; - - asm __volatile__ ("mov.u32 %0, %%clock;" : "=r"(thetime)); - - return thetime; -} - -#endif - void -delay (unsigned long *d_o, unsigned long delay) +delay () { - int start, ticks; + int i, sum; + const int N = 500000; - start = clock (); - - ticks = 0; - - while (ticks < delay) - ticks = clock () - start; - - return; + for (i = 0; i < N; i++) + sum = sum + 1; } +#pragma acc routine nohost void -delay2 (unsigned long *d_o, unsigned long delay, unsigned long tid) +delay2 (unsigned long *d_o, unsigned long tid) { - int start, ticks; + int i, sum; + const int N = 500000; - start = clock (); - - ticks = 0; - - while (ticks < delay) - ticks = clock () - start; + for (i = 0; i < N; i++) + sum = sum + 1; d_o[0] = tid; - - return; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx index 6f748fcaf9ef..88b63bfb74b3 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx @@ -1,148 +1,90 @@ -// BEGIN PREAMBLE - .version 3.1 - .target sm_30 + .version 3.1 + .target sm_30 .address_size 64 -// END PREAMBLE -// BEGIN FUNCTION DEF: clock -.func (.param.u32 %out_retval)clock -{ -.reg.u32 %retval; - .reg.u64 %hr10; - .reg.u32 %r22; - .reg.u32 %r23; - .reg.u32 %r24; - .local.align 8 .b8 %frame[8]; - // #APP -// 7 "subr.c" 1 - mov.u32 %r24, %clock; -// 0 "" 2 - // #NO_APP - st.local.u32 [%frame], %r24; - ld.local.u32 %r22, [%frame]; - mov.u32 %r23, %r22; - mov.u32 %retval, %r23; - st.param.u32 [%out_retval], %retval; - ret; - } -// END FUNCTION DEF -// BEGIN GLOBAL FUNCTION DEF: delay -.visible .entry delay(.param.u64 %in_ar1, .param.u64 %in_ar2) -{ - .reg.u64 %ar1; - .reg.u64 %ar2; - .reg.u64 %hr10; - .reg.u64 %r22; - .reg.u32 %r23; - .reg.u64 %r24; - .reg.u64 %r25; - .reg.u32 %r26; - .reg.u32 %r27; - .reg.u32 %r28; - .reg.u32 %r29; - .reg.u32 %r30; - .reg.u64 %r31; - .reg.pred %r32; - .local.align 8 .b8 %frame[24]; - ld.param.u64 %ar1, [%in_ar1]; - ld.param.u64 %ar2, [%in_ar2]; - mov.u64 %r24, %ar1; - st.u64 [%frame+8], %r24; - mov.u64 %r25, %ar2; - st.local.u64 [%frame+16], %r25; + .visible .entry delay { - .param.u32 %retval_in; - { - call (%retval_in), clock; - } - ld.param.u32 %r26, [%retval_in]; -} - st.local.u32 [%frame+4], %r26; - mov.u32 %r27, 0; - st.local.u32 [%frame], %r27; - bra $L4; -$L5: - { - .param.u32 %retval_in; - { - call (%retval_in), clock; - } - ld.param.u32 %r28, [%retval_in]; -} - mov.u32 %r23, %r28; - ld.local.u32 %r30, [%frame+4]; - sub.u32 %r29, %r23, %r30; - st.local.u32 [%frame], %r29; -$L4: - ld.local.s32 %r22, [%frame]; - ld.local.u64 %r31, [%frame+16]; - setp.lo.u64 %r32,%r22,%r31; - @%r32 bra $L5; + .reg .u64 %hr10; + .reg .u32 %r22; + .reg .u32 %r23; + .reg .u32 %r24; + .reg .u32 %r25; + .reg .u32 %r26; + .reg .u32 %r27; + .reg .u32 %r28; + .reg .u32 %r29; + .reg .pred %r30; + .reg .u64 %frame; + .local .align 8 .b8 %farray[16]; + cvta.local.u64 %frame,%farray; + mov.u32 %r22,500000; + st.u32 [%frame+8],%r22; + mov.u32 %r23,0; + st.u32 [%frame],%r23; + bra $L2; + $L3: + ld.u32 %r25,[%frame+4]; + add.u32 %r24,%r25,1; + st.u32 [%frame+4],%r24; + ld.u32 %r27,[%frame]; + add.u32 %r26,%r27,1; + st.u32 [%frame],%r26; + $L2: + ld.u32 %r28,[%frame]; + ld.u32 %r29,[%frame+8]; + setp.lt.s32 %r30,%r28,%r29; + @%r30 + bra $L3; ret; } -// END FUNCTION DEF -// BEGIN GLOBAL FUNCTION DEF: delay2 -.visible .entry delay2(.param.u64 %in_ar1, .param.u64 %in_ar2, .param.u64 %in_ar3) -{ - .reg.u64 %ar1; - .reg.u64 %ar2; - .reg.u64 %ar3; - .reg.u64 %hr10; - .reg.u64 %r22; - .reg.u32 %r23; - .reg.u64 %r24; - .reg.u64 %r25; - .reg.u64 %r26; - .reg.u32 %r27; - .reg.u32 %r28; - .reg.u32 %r29; - .reg.u32 %r30; - .reg.u32 %r31; - .reg.u64 %r32; - .reg.pred %r33; - .reg.u64 %r34; - .reg.u64 %r35; - .local.align 8 .b8 %frame[32]; - ld.param.u64 %ar1, [%in_ar1]; - ld.param.u64 %ar2, [%in_ar2]; - ld.param.u64 %ar3, [%in_ar3]; - mov.u64 %r24, %ar1; - st.local.u64 [%frame+8], %r24; - mov.u64 %r25, %ar2; - st.local.u64 [%frame+16], %r25; - mov.u64 %r26, %ar3; - st.local.u64 [%frame+24], %r26; - { - .param.u32 %retval_in; - { - call (%retval_in), clock; - } - ld.param.u32 %r27, [%retval_in]; -} - st.local.u32 [%frame+4], %r27; - mov.u32 %r28, 0; - st.local.u32 [%frame], %r28; - bra $L8; -$L9: - { - .param.u32 %retval_in; + + .visible .entry delay2 (.param .u64 %in_ar1, .param .u64 %in_ar2) { - call (%retval_in), clock; - } - ld.param.u32 %r29, [%retval_in]; -} - mov.u32 %r23, %r29; - ld.local.u32 %r31, [%frame+4]; - sub.u32 %r30, %r23, %r31; - st.local.u32 [%frame], %r30; -$L8: - ld.local.s32 %r22, [%frame]; - ld.local.u64 %r32, [%frame+16]; - setp.lo.u64 %r33,%r22,%r32; - @%r33 bra $L9; - ld.local.u64 %r34, [%frame+8]; - ld.local.u64 %r35, [%frame+24]; - st.u64 [%r34], %r35; + .reg .u64 %ar1; + .reg .u64 %ar2; + .reg .u64 %hr10; + .reg .u64 %r22; + .reg .u64 %r23; + .reg .u32 %r24; + .reg .u32 %r25; + .reg .u32 %r26; + .reg .u32 %r27; + .reg .u32 %r28; + .reg .u32 %r29; + .reg .u32 %r30; + .reg .u32 %r31; + .reg .pred %r32; + .reg .u64 %r33; + .reg .u64 %r34; + .reg .u64 %frame; + .local .align 8 .b8 %farray[32]; + cvta.local.u64 %frame,%farray; + ld.param.u64 %ar1,[%in_ar1]; + ld.param.u64 %ar2,[%in_ar2]; + mov.u64 %r22,%ar1; + st.u64 [%frame+16],%r22; + mov.u64 %r23,%ar2; + st.u64 [%frame+24],%r23; + mov.u32 %r24,500000; + st.u32 [%frame+8],%r24; + mov.u32 %r25,0; + st.u32 [%frame],%r25; + bra $L5; + $L6: + ld.u32 %r27,[%frame+4]; + add.u32 %r26,%r27,1; + st.u32 [%frame+4],%r26; + ld.u32 %r29,[%frame]; + add.u32 %r28,%r29,1; + st.u32 [%frame],%r28; + $L5: + ld.u32 %r30,[%frame]; + ld.u32 %r31,[%frame+8]; + setp.lt.s32 %r32,%r30,%r31; + @%r32 + bra $L6; + ld.u64 %r33,[%frame+16]; + ld.u64 %r34,[%frame+24]; + st.u64 [%r33],%r34; ret; } -// END FUNCTION DEF diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h index 53749da5a0de..e69de29bb2d1 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h @@ -1,103 +0,0 @@ - -#include -#include - -static int _Tnum_timers; -static CUevent *_Tstart_events, *_Tstop_events; -static CUstream _Tstream; - -void -init_timers (int ntimers) -{ - int i; - CUresult r; - - _Tnum_timers = ntimers; - - _Tstart_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent)); - _Tstop_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent)); - - r = cuStreamCreate (&_Tstream, CU_STREAM_DEFAULT); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuStreamCreate failed: %d\n", r); - abort (); - } - - for (i = 0; i < _Tnum_timers; i++) - { - r = cuEventCreate (&_Tstart_events[i], CU_EVENT_DEFAULT); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuEventCreate failed: %d\n", r); - abort (); - } - - r = cuEventCreate (&_Tstop_events[i], CU_EVENT_DEFAULT); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuEventCreate failed: %d\n", r); - abort (); - } - } -} - -void -fini_timers (void) -{ - int i; - - for (i = 0; i < _Tnum_timers; i++) - { - cuEventDestroy (_Tstart_events[i]); - cuEventDestroy (_Tstop_events[i]); - } - - cuStreamDestroy (_Tstream); - - free (_Tstart_events); - free (_Tstop_events); -} - -void -start_timer (int timer) -{ - CUresult r; - - r = cuEventRecord (_Tstart_events[timer], _Tstream); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuEventRecord failed: %d\n", r); - abort (); - } -} - -float -stop_timer (int timer) -{ - CUresult r; - float etime; - - r = cuEventRecord (_Tstop_events[timer], _Tstream); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuEventRecord failed: %d\n", r); - abort (); - } - - r = cuEventSynchronize (_Tstop_events[timer]); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuEventSynchronize failed: %d\n", r); - abort (); - } - - r = cuEventElapsedTime (&etime, _Tstart_events[timer], _Tstop_events[timer]); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuEventElapsedTime failed: %d\n", r); - abort (); - } - - return etime; -} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 index 19eb4bd6a450..b5586be06090 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 @@ -55,7 +55,8 @@ program asyncwait c(:) = 0.0 d(:) = 0.0 - !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) create (d(1:N)) + !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) & + !$acc& create (d(1:N)) !$acc parallel async (1) do i = 1, N @@ -76,7 +77,8 @@ program asyncwait !$acc end parallel !$acc wait (1) - !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) copyout (d(1:N)) + !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) & + !$acc& copyout (d(1:N)) do i = 1, N if (a(i) .ne. 3.0) STOP 5 @@ -91,7 +93,8 @@ program asyncwait d(:) = 0.0 e(:) = 0.0 - !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) create (d(1:N)) copyin (e(1:N)) + !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) & + !$acc& create (d(1:N)) copyin (e(1:N)) !$acc parallel async (1) do i = 1, N @@ -118,7 +121,8 @@ program asyncwait !$acc end parallel !$acc wait (1) - !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) copyout (d(1:N)) copyout (e(1:N)) + !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) & + !$acc& copyout (d(1:N)) copyout (e(1:N)) !$acc exit data delete (N) do i = 1, N