]> git.ipfire.org Git - thirdparty/ccache.git/commitdiff
Bug fixes and tests for nvcc
authorClemens Rabe <clemens.rabe@clemensrabe.de>
Wed, 23 Aug 2017 07:33:23 +0000 (09:33 +0200)
committerJoel Rosdahl <joel@rosdahl.net>
Sat, 27 Jan 2018 20:24:41 +0000 (21:24 +0100)
Feature: Added cuda compiler in separate travis job and implemented
         tests for nvcc.

Feature: Added support for nvcc compiler options --compiler-bindir/-ccbin,
         --output-directory/-odir and --libdevice-directory/-ldir.
         Added tests for these options in test.sh.

Bug fix: Original patch had a statement to avoid using the preprocessed
         input files for nvcc when run_second_cpp is false. Otherwise,
         when a build is necessary and the preprocessed output was used,
         nvcc results with a compiler error. The patch simply ensures
         run_second_cpp is always set to true for the cuda compiler.

Bug fix: The -optf and -odir options are only accepted for nvcc. For other
         compilers, they behave like '-o ptf' resp. '-o dir'.
         A test was added to check this behaviour.

.travis.yml
.travis/install_cuda.sh [new file with mode: 0644]
ccache.c
compopt.c
test.sh

index 13ef60a28082c7c3f7e6ccbf47b9648601b18073..a4793bb8aad11d316f01818916574f4f40c6dae3 100644 (file)
@@ -1,5 +1,7 @@
 language: c
 
+sudo: required
+
 addons:
     apt:
         packages:
@@ -32,11 +34,17 @@ matrix:
         - os: linux
           compiler: clang
           env: PATH="/usr/bin:$PATH" TEST=analyze
+        - os: linux
+          compiler: gcc
+          env: CUDA=8.0.61-1
 
     exclude:
         - os: osx
           compiler: gcc
 
+before_install:
+    - source ./.travis/install_cuda.sh
+
 script:
     - ./autogen.sh
     - ./configure $HOST
diff --git a/.travis/install_cuda.sh b/.travis/install_cuda.sh
new file mode 100644 (file)
index 0000000..a8f141e
--- /dev/null
@@ -0,0 +1,28 @@
+#
+# Install CUDA.
+#
+# Version is given in CUDA variable. If left empty, this script does
+# nothing. As variables are exported by this script, "source" it
+# rather than executing it.
+#
+
+if [ -n "$CUDA" ]; then
+    echo "Installing CUDA support"
+    travis_retry wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_${CUDA}_amd64.deb
+    travis_retry sudo dpkg -i cuda-repo-ubuntu1404_${CUDA}_amd64.deb
+    travis_retry sudo apt-get update -qq
+    export CUDA_APT=${CUDA:0:3}
+    export CUDA_APT=${CUDA_APT/./-}
+
+    travis_retry sudo apt-get install -y cuda-command-line-tools-${CUDA_APT}
+    travis_retry sudo apt-get clean
+
+    export CUDA_HOME=/usr/local/cuda-${CUDA:0:3}
+    export LD_LIBRARY_PATH=${CUDA_HOME}/nvvm/lib64:${LD_LIBRARY_PATH}
+    export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
+    export PATH=${CUDA_HOME}/bin:${PATH}
+
+    nvcc --version
+else
+    echo "CUDA is NOT installed."
+fi
index 8e1db287aa439e324c14eb0d1f76e3a3e28d6bdc..b9a41a5d7809b7e81d8187051bd7ac443127b09f 100644 (file)
--- a/ccache.c
+++ b/ccache.c
@@ -487,6 +487,15 @@ compiler_is_gcc(struct args *args)
        return result;
 }
 
+static bool
+compiler_is_nvcc(struct args *args)
+{
+       char *name = basename(args->argv[0]);
+       bool result = strstr(name, "nvcc") != NULL;
+       free(name);
+       return result;
+}
+
 static bool
 compiler_is_pump(struct args *args)
 {
@@ -2140,7 +2149,8 @@ cc_process_args(struct args *args, struct args **preprocessor_args,
                }
 
                // Handle cuda "-optf" and "--options-file" argument.
-               if (str_eq(argv[i], "-optf") || str_eq(argv[i], "--options-file")) {
+               if ((str_eq(argv[i], "-optf") || str_eq(argv[i], "--options-file"))
+                   && compiler_is_nvcc(args)) {
                        if (i == argc - 1) {
                                cc_log("Expected argument after %s", argv[i]);
                                stats_update(STATS_ARGS);
@@ -2275,8 +2285,8 @@ cc_process_args(struct args *args, struct args **preprocessor_args,
                        continue;
                }
 
-               // Alternate form of -o with no space.
-               if (str_startswith(argv[i], "-o")) {
+               // Alternate form of -o with no space. Nvcc does not support this.
+               if (str_startswith(argv[i], "-o") && !compiler_is_nvcc(args)) {
                        output_obj = make_relative_path(x_strdup(&argv[i][2]));
                        continue;
                }
@@ -2792,6 +2802,11 @@ cc_process_args(struct args *args, struct args **preprocessor_args,
                goto out;
        }
 
+       if (!conf->run_second_cpp && str_eq(actual_language, "cuda")) {
+               cc_log("Call cuda compiler with original input, not preprocessed input file.");
+               conf->run_second_cpp = true;
+       }
+
        direct_i_file = language_is_preprocessed(actual_language);
 
        if (output_is_precompiled_header && !conf->run_second_cpp) {
index 1d9a659f7249aa9709c2368070b88ad51924f78e..89976a4a8068261dd087d4bbb579befddd3aa53d 100644 (file)
--- a/compopt.c
+++ b/compopt.c
@@ -30,6 +30,9 @@ struct compopt {
 };
 
 static const struct compopt compopts[] = {
+       {"--compiler-bindir", AFFECTS_CPP | TAKES_ARG}, // nvcc
+       {"--libdevice-directory", AFFECTS_CPP | TAKES_ARG}, // nvcc
+       {"--output-directory", AFFECTS_CPP | TAKES_ARG}, // nvcc
        {"--param",         TAKES_ARG},
        {"--save-temps",    TOO_HARD},
        {"--serialize-diagnostics", TAKES_ARG | TAKES_PATH},
@@ -56,6 +59,7 @@ static const struct compopt compopts[] = {
        {"-arch",           TAKES_ARG},
        {"-aux-info",       TAKES_ARG},
        {"-b",              TAKES_ARG},
+       {"-ccbin",          AFFECTS_CPP | TAKES_ARG}, // nvcc
        {"-fmodules",       TOO_HARD},
        {"-fno-working-directory", AFFECTS_CPP},
        {"-fplugin=libcc1plugin", TOO_HARD}, // interaction with GDB
@@ -75,8 +79,10 @@ static const struct compopt compopts[] = {
        {"-iwithprefix",    AFFECTS_CPP | TAKES_ARG | TAKES_CONCAT_ARG | TAKES_PATH},
        {"-iwithprefixbefore",
         AFFECTS_CPP | TAKES_ARG | TAKES_CONCAT_ARG | TAKES_PATH},
+       {"-ldir",           AFFECTS_CPP | TAKES_ARG}, // nvcc
        {"-nostdinc",       AFFECTS_CPP},
        {"-nostdinc++",     AFFECTS_CPP},
+       {"-odir",           AFFECTS_CPP | TAKES_ARG}, // nvcc
        {"-remap",          AFFECTS_CPP},
        {"-save-temps",     TOO_HARD},
        {"-stdlib=",        AFFECTS_CPP | TAKES_CONCAT_ARG},
diff --git a/test.sh b/test.sh
index af569204712dd3fe14189ea3723d65c88ee8b3f2..8d3779119891335117b38cad788a084d67f6fb5e 100755 (executable)
--- a/test.sh
+++ b/test.sh
@@ -122,6 +122,18 @@ expect_equal_files() {
     fi
 }
 
+expect_different_files() {
+    if [ ! -e "$1" ]; then
+        test_failed "compare_files: $1 missing"
+    fi
+    if [ ! -e "$2" ]; then
+        test_failed "compare_files: $2 missing"
+    fi
+    if cmp -s "$1" "$2"; then
+        test_failed "compare_files:: $1 and $2 are identical"
+    fi
+}
+
 expect_equal_object_files() {
     if $HOST_OS_LINUX && $COMPILER_TYPE_CLANG; then
         if ! which eu-elfcmp >/dev/null 2>&1; then
@@ -271,6 +283,25 @@ base_tests() {
     $REAL_COMPILER -c -o reference_test1.o test1.c
     expect_equal_object_files reference_test1.o foo.o
 
+    # -------------------------------------------------------------------------
+    TEST "Output option without space"
+
+    $CCACHE_COMPILE -c test1.c
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+
+    $CCACHE_COMPILE -c test1.c -odir
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 1
+
+    $CCACHE_COMPILE -c test1.c -optf
+    expect_stat 'cache hit (preprocessed)' 2
+    expect_stat 'cache miss' 1
+
+    $REAL_COMPILER -c -o reference_test1.o test1.c
+    expect_equal_object_files reference_test1.o dir
+    expect_equal_object_files reference_test1.o ptf
+
     # -------------------------------------------------------------------------
     TEST "Called for link"
 
@@ -3440,6 +3471,561 @@ SUITE_input_charset() {
     expect_stat 'cache miss' 2
 }
 
+# =============================================================================
+nvcc_PROBE() {
+    if [ -z "$REAL_NVCC" ]; then
+        echo "nvcc is not available"
+    elif [ -z "$REAL_CUOBJDUMP" ]; then
+       echo "cuobjdump is not available"
+    fi
+}
+
+nvcc_SETUP() {
+    # Test code using only c++ (option --x c++). Faster than compiling cuda.
+    cat <<EOF > test_cpp.cu
+#ifndef NUM
+#define NUM 10000
+#endif
+
+void caller() {
+  for (int i=0; i<NUM; ++i);
+}
+EOF
+
+    # Option files to modify the define
+    cat <<EOF >test1.optf
+-DNUM=1
+EOF
+    cat <<EOF >test2.optf
+-DNUM=2
+EOF
+
+    # Test code using cuda.
+    cat <<EOF >test_cuda.cu
+#ifndef NUM
+#define NUM 10000
+#endif
+
+__global__
+void add(int* a, int* b) {
+  int i = blockIdx.x;
+  if (i<NUM) {
+    b[i] = 2*a[i];
+  }
+}
+
+void caller() {
+  add<<<NUM, 1>>>(NULL,NULL);
+}
+EOF
+}
+
+nvcc_tests() {
+    # Reference file testing was not successfull due to different "fatbin" data.
+    # Another source of differences are the temporary files created by nvcc,
+    # that can be avoided by using the options '--keep --keep-dir ./keep'.
+    # So instead of comparing the binary object files, we compare the dumps of
+    # cuobjdump -all -elf -symbols -ptx -sass test1.o
+    NVCC_OPTS_CPP="-Wno-deprecated-gpu-targets -c --x c++"
+    NVCC_OPTS_CUDA="-Wno-deprecated-gpu-targets -c"
+    NVCC_OPTS_GPU1="--generate-code arch=compute_50,code=compute_50"
+    NVCC_OPTS_GPU2="--generate-code arch=compute_52,code=sm_52"
+    CCACHE_NVCC_CPP="$CCACHE $REAL_NVCC $NVCC_OPTS_CPP"
+    CCACHE_NVCC_CUDA="$CCACHE $REAL_NVCC $NVCC_OPTS_CUDA"
+    CUOBJDUMP="$REAL_CUOBJDUMP -all -elf -symbols -ptx -sass"
+
+    # -------------------------------------------------------------------------
+    TEST "simple mode"
+
+    $REAL_NVCC $NVCC_OPTS_CPP -o reference_test1.o test_cpp.cu
+
+    # First compile
+    $CCACHE_NVCC_CPP test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+
+    $CCACHE_NVCC_CPP test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+
+    # -------------------------------------------------------------------------
+    TEST "different GPU architectures"
+
+    $REAL_NVCC $NVCC_OPTS_CUDA                 -o reference_test1.o test_cuda.cu
+    $REAL_NVCC $NVCC_OPTS_CUDA $NVCC_OPTS_GPU1 -o reference_test2.o test_cuda.cu
+    $REAL_NVCC $NVCC_OPTS_CUDA $NVCC_OPTS_GPU2 -o reference_test3.o test_cuda.cu
+    $CUOBJDUMP reference_test1.o > reference_test1.dump
+    $CUOBJDUMP reference_test2.o > reference_test2.dump
+    $CUOBJDUMP reference_test3.o > reference_test3.dump
+    expect_different_files reference_test1.dump reference_test2.dump
+    expect_different_files reference_test1.dump reference_test3.dump
+    expect_different_files reference_test2.dump reference_test3.dump
+
+    $CCACHE_NVCC_CUDA test_cuda.cu
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test1.dump test1.dump
+
+    # Other GPU
+    $CCACHE_NVCC_CUDA $NVCC_OPTS_GPU1 test_cuda.cu
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 2
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test2.dump test1.dump
+
+    $CCACHE_NVCC_CUDA $NVCC_OPTS_GPU1 test_cuda.cu
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 2
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test2.dump test1.dump
+
+    # Another GPU
+    $CCACHE_NVCC_CUDA $NVCC_OPTS_GPU2 test_cuda.cu
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 3
+    expect_stat 'files in cache' 3
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test3.dump test1.dump
+
+    $CCACHE_NVCC_CUDA $NVCC_OPTS_GPU2 test_cuda.cu
+    expect_stat 'cache hit (preprocessed)' 2
+    expect_stat 'cache miss' 3
+    expect_stat 'files in cache' 3
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test3.dump test1.dump
+
+    # -------------------------------------------------------------------------
+    TEST "different defines"
+
+    $REAL_NVCC $NVCC_OPTS_CPP            -o reference_test1.o test_cpp.cu
+    $REAL_NVCC $NVCC_OPTS_CPP -DNUM=10   -o reference_test2.o test_cpp.cu
+    expect_different_files reference_test1.o reference_test2.o
+
+    $CCACHE_NVCC_CPP test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+
+    # Specified define, but unused. Can only be found by preprocessed mode
+    $CCACHE_NVCC_CPP -DDUMMYENV=1 test_cpp.cu
+    expect_stat "cache hit (preprocessed)" 1
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+
+    # Specified used define
+    $CCACHE_NVCC_CPP -DNUM=10 test_cpp.cu
+    expect_stat "cache hit (preprocessed)" 1
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 2
+    expect_equal_files reference_test2.o test_cpp.o
+
+    $CCACHE_NVCC_CPP -DNUM=10 test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 2
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 2
+    expect_equal_files reference_test2.o test_cpp.o
+
+    # -------------------------------------------------------------------------
+    TEST "option file"
+
+    $REAL_NVCC $NVCC_OPTS_CPP -optf test1.optf -o reference_test1.o test_cpp.cu
+    $REAL_NVCC $NVCC_OPTS_CPP -optf test2.optf -o reference_test2.o test_cpp.cu
+    expect_different_files reference_test1.o reference_test2.o
+
+    $CCACHE_NVCC_CPP -optf test1.optf test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+
+    $CCACHE_NVCC_CPP -optf test1.optf test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+
+    $CCACHE_NVCC_CPP -optf test2.optf test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 2
+    expect_equal_files reference_test2.o test_cpp.o
+
+    $CCACHE_NVCC_CPP -optf test2.optf test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 2
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 2
+    expect_equal_files reference_test2.o test_cpp.o
+
+    # -------------------------------------------------------------------------
+    TEST "option --compiler-bindir"
+
+    $REAL_NVCC $NVCC_OPTS_CPP --compiler-bindir $REAL_COMPILER \
+                  -o reference_test1.o test_cpp.cu
+
+    # First compile
+    $CCACHE_NVCC_CPP --compiler-bindir $REAL_COMPILER test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+
+    $CCACHE_NVCC_CPP --compiler-bindir $REAL_COMPILER test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+
+    # -------------------------------------------------------------------------
+    TEST "option -ccbin"
+
+    $REAL_NVCC $NVCC_OPTS_CPP -ccbin $REAL_COMPILER \
+                  -o reference_test1.o test_cpp.cu
+
+    # First compile
+    $CCACHE_NVCC_CPP -ccbin $REAL_COMPILER test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+
+    $CCACHE_NVCC_CPP -ccbin $REAL_COMPILER test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+
+    # -------------------------------------------------------------------------
+    TEST "option --output-directory"
+
+    $REAL_NVCC $NVCC_OPTS_CPP --output-directory . \
+                  -o reference_test1.o test_cpp.cu
+
+    # First compile
+    $CCACHE_NVCC_CPP --output-directory . test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+
+    $CCACHE_NVCC_CPP --output-directory . test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+
+    # -------------------------------------------------------------------------
+    TEST "option -odir"
+
+    $REAL_NVCC $NVCC_OPTS_CPP -odir . \
+                  -o reference_test1.o test_cpp.cu
+
+    # First compile
+    $CCACHE_NVCC_CPP -odir . test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+
+    $CCACHE_NVCC_CPP -odir . test_cpp.cu
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    expect_equal_files reference_test1.o test_cpp.o
+}
+
+# =============================================================================
+
+SUITE_nvcc_PROBE() {
+    nvcc_PROBE
+}
+
+SUITE_nvcc_SETUP() {
+    nvcc_SETUP
+}
+
+SUITE_nvcc() {
+    nvcc_tests
+}
+
+# =============================================================================
+
+SUITE_nvcc_ldir_PROBE() {
+    if [ -z "$REAL_NVCC" ]; then
+        echo "nvcc is not available"
+       return
+    elif [ -z "$REAL_CUOBJDUMP" ]; then
+       echo "cuobjdump is not available"
+       return
+    fi
+
+    NVCC_DIR=$(dirname $REAL_NVCC)
+    NVCC_LDIR=$NVCC_DIR/../nvvm/libdevice
+    CICC_PATH=$NVCC_DIR/../nvvm/bin
+    NVCC_IDIR=$NVCC_DIR/../include
+    # Workaround for canonical ubuntu package
+    [ ! -d $NVCC_LDIR ] && NVCC_LDIR=/usr/lib/nvidia-cuda-toolkit/libdevice
+    [ ! -d $CICC_PATH ] && CICC_PATH=/usr/lib/nvidia-cuda-toolkit/bin
+    [ ! -d $NVCC_IDIR ] && NVCC_IDIR=/usr/include
+    if [ ! -d $NVCC_LDIR ]; then
+       echo "libdevice directory $NVCC_LDIR not found"
+    elif [ ! -d $CICC_PATH ]; then
+       echo "path $CICC_PATH not found"
+    elif [ ! -d $NVCC_IDIR ]; then
+       echo "include directory $NVCC_IDIR not found"
+    fi
+}
+
+SUITE_nvcc_ldir_SETUP() {
+    nvcc_SETUP
+}
+
+SUITE_nvcc_ldir() {
+    NVCC_OPTS_CUDA="-Wno-deprecated-gpu-targets -c"
+    CCACHE_NVCC_CUDA="$CCACHE $REAL_NVCC $NVCC_OPTS_CUDA"
+    CUOBJDUMP="$REAL_CUOBJDUMP -all -elf -symbols -ptx -sass"
+    NVCC_DIR=$(dirname $REAL_NVCC)
+    NVCC_LDIR=$NVCC_DIR/../nvvm/libdevice
+    CICC_PATH=$NVCC_DIR/../nvvm/bin
+    NVCC_IDIR=$NVCC_DIR/../include
+    # Workaround for canonical ubuntu package
+    [ ! -d $NVCC_LDIR ] && NVCC_LDIR=/usr/lib/nvidia-cuda-toolkit/libdevice
+    [ ! -d $CICC_PATH ] && CICC_PATH=/usr/lib/nvidia-cuda-toolkit/bin
+    [ ! -d $NVCC_IDIR ] && NVCC_IDIR=/usr/include
+
+    TEST "option --libdevice-directory"
+
+    OLD_PATH=$PATH
+    TEST_OPTS="--libdevice-directory $NVCC_LDIR -I $NVCC_IDIR --dont-use-profile"
+    export PATH=$PATH:$CICC_PATH
+
+    $REAL_NVCC $NVCC_OPTS_CUDA $TEST_OPTS -o reference_test1.o test_cuda.cu
+    $CUOBJDUMP reference_test1.o > reference_test1.dump
+
+    # First compile
+    $CCACHE_NVCC_CUDA $TEST_OPTS test_cuda.cu
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test1.dump test1.dump
+
+    $CCACHE_NVCC_CUDA $TEST_OPTS test_cuda.cu
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test1.dump test1.dump
+
+    # ---------------------------------------------------------------------
+    TEST "option -ldir"
+
+    TEST_OPTS="-ldir $NVCC_LDIR -I $NVCC_IDIR --dont-use-profile"
+    $REAL_NVCC $NVCC_OPTS_CUDA $TEST_OPTS -o reference_test1.o test_cuda.cu
+    $CUOBJDUMP reference_test1.o > reference_test1.dump
+
+    # First compile
+    $CCACHE_NVCC_CUDA $TEST_OPTS test_cuda.cu
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test1.dump test1.dump
+
+    $CCACHE_NVCC_CUDA $TEST_OPTS test_cuda.cu
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 1
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test1.dump test1.dump
+
+    export PATH=$OLD_PATH
+}
+
+# =============================================================================
+
+SUITE_nvcc_nocpp2_PROBE() {
+    nvcc_PROBE
+}
+
+SUITE_nvcc_nocpp2_SETUP() {
+    export CCACHE_NOCPP2=1
+    nvcc_SETUP
+}
+
+SUITE_nvcc_nocpp2() {
+    nvcc_tests
+}
+
+# =============================================================================
+
+SUITE_nvcc_direct_PROBE() {
+    nvcc_PROBE
+}
+
+SUITE_nvcc_direct_SETUP() {
+    unset CCACHE_NODIRECT
+
+    nvcc_SETUP
+}
+
+SUITE_nvcc_direct() {
+    # Reference file testing was not successfull due to different "fatbin" data.
+    # Another source of differences are the temporary files created by nvcc,
+    # that can be avoided by using the options '--keep --keep-dir ./keep'.
+    # So instead of comparing the binary object files, we compare the dumps of
+    # cuobjdump -all -elf -symbols -ptx -sass test1.o
+    NVCC_OPTS_CPP="-Wno-deprecated-gpu-targets -c --x c++"
+    NVCC_OPTS_CUDA="-Wno-deprecated-gpu-targets -c"
+    NVCC_OPTS_GPU1="--generate-code arch=compute_50,code=compute_50"
+    NVCC_OPTS_GPU2="--generate-code arch=compute_52,code=sm_52"
+    CCACHE_NVCC_CPP="$CCACHE $REAL_NVCC $NVCC_OPTS_CPP"
+    CCACHE_NVCC_CUDA="$CCACHE $REAL_NVCC $NVCC_OPTS_CUDA"
+    CUOBJDUMP="$REAL_CUOBJDUMP -all -elf -symbols -ptx -sass"
+
+    # -------------------------------------------------------------------------
+    TEST "simple mode"
+
+    $REAL_NVCC $NVCC_OPTS_CPP -o reference_test1.o test_cpp.cu
+
+    # First compile
+    $CCACHE_NVCC_CPP test_cpp.cu
+    expect_stat 'cache hit (direct)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 2
+    expect_equal_files reference_test1.o test_cpp.o
+
+    $CCACHE_NVCC_CPP test_cpp.cu
+    expect_stat 'cache hit (direct)' 1
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 2
+    expect_equal_files reference_test1.o test_cpp.o
+
+    # -------------------------------------------------------------------------
+    TEST "different GPU architectures"
+
+    $REAL_NVCC $NVCC_OPTS_CUDA                 -o reference_test1.o test_cuda.cu
+    $REAL_NVCC $NVCC_OPTS_CUDA $NVCC_OPTS_GPU1 -o reference_test2.o test_cuda.cu
+    $REAL_NVCC $NVCC_OPTS_CUDA $NVCC_OPTS_GPU2 -o reference_test3.o test_cuda.cu
+    $CUOBJDUMP reference_test1.o > reference_test1.dump
+    $CUOBJDUMP reference_test2.o > reference_test2.dump
+    $CUOBJDUMP reference_test3.o > reference_test3.dump
+    expect_different_files reference_test1.dump reference_test2.dump
+    expect_different_files reference_test1.dump reference_test3.dump
+    expect_different_files reference_test2.dump reference_test3.dump
+
+    $CCACHE_NVCC_CUDA test_cuda.cu
+    expect_stat 'cache hit (direct)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 2
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test1.dump test1.dump
+
+    # Other GPU
+    $CCACHE_NVCC_CUDA $NVCC_OPTS_GPU1 test_cuda.cu
+    expect_stat 'cache hit (direct)' 0
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 4
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test2.dump test1.dump
+
+    $CCACHE_NVCC_CUDA $NVCC_OPTS_GPU1 test_cuda.cu
+    expect_stat 'cache hit (direct)' 1
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 4
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test2.dump test1.dump
+
+    # Another GPU
+    $CCACHE_NVCC_CUDA $NVCC_OPTS_GPU2 test_cuda.cu
+    expect_stat 'cache hit (direct)' 1
+    expect_stat 'cache miss' 3
+    expect_stat 'files in cache' 6
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test3.dump test1.dump
+
+    $CCACHE_NVCC_CUDA $NVCC_OPTS_GPU2 test_cuda.cu
+    expect_stat 'cache hit (direct)' 2
+    expect_stat 'cache miss' 3
+    expect_stat 'files in cache' 6
+    $CUOBJDUMP test_cuda.o > test1.dump
+    expect_equal_files reference_test3.dump test1.dump
+
+    # -------------------------------------------------------------------------
+    TEST "different defines"
+
+    $REAL_NVCC $NVCC_OPTS_CPP            -o reference_test1.o test_cpp.cu
+    $REAL_NVCC $NVCC_OPTS_CPP -DNUM=10   -o reference_test2.o test_cpp.cu
+    expect_different_files reference_test1.o reference_test2.o
+
+    $CCACHE_NVCC_CPP test_cpp.cu
+    expect_stat 'cache hit (direct)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 2
+    expect_equal_files reference_test1.o test_cpp.o
+
+    # Specified define, but unused. Can only be found by preprocessed mode
+    $CCACHE_NVCC_CPP -DDUMMYENV=1 test_cpp.cu
+    expect_stat "cache hit (preprocessed)" 1
+    expect_stat "cache hit (direct)" 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 3
+    expect_equal_files reference_test1.o test_cpp.o
+
+    # Specified used define
+    $CCACHE_NVCC_CPP -DNUM=10 test_cpp.cu
+    expect_stat "cache hit (direct)" 0
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 5
+    expect_equal_files reference_test2.o test_cpp.o
+
+    $CCACHE_NVCC_CPP -DNUM=10 test_cpp.cu
+    expect_stat 'cache hit (direct)' 1
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 5
+    expect_equal_files reference_test2.o test_cpp.o
+
+    # -------------------------------------------------------------------------
+    TEST "option file"
+
+    $REAL_NVCC $NVCC_OPTS_CPP -optf test1.optf -o reference_test1.o test_cpp.cu
+    $REAL_NVCC $NVCC_OPTS_CPP -optf test2.optf -o reference_test2.o test_cpp.cu
+    expect_different_files reference_test1.o reference_test2.o
+
+    $CCACHE_NVCC_CPP -optf test1.optf test_cpp.cu
+    expect_stat 'cache hit (direct)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 2
+    expect_equal_files reference_test1.o test_cpp.o
+
+    $CCACHE_NVCC_CPP -optf test1.optf test_cpp.cu
+    expect_stat 'cache hit (direct)' 1
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 2
+    expect_equal_files reference_test1.o test_cpp.o
+
+    $CCACHE_NVCC_CPP -optf test2.optf test_cpp.cu
+    expect_stat 'cache hit (direct)' 1
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 4
+    expect_equal_files reference_test2.o test_cpp.o
+
+    $CCACHE_NVCC_CPP -optf test2.optf test_cpp.cu
+    expect_stat 'cache hit (direct)' 2
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 4
+    expect_equal_files reference_test2.o test_cpp.o
+}
+
 # =============================================================================
 # main program
 
@@ -3451,6 +4037,14 @@ EOF
     exit 1
 fi
 
+# Remove common ccache directories on host from PATH variable
+HOST_CCACHE_DIRS="/usr/lib/ccache/bin
+/usr/lib/ccache"
+for HOST_CCACHE_DIR in $HOST_CCACHE_DIRS; do
+    PATH=$(echo -n $PATH | awk -v RS=: -v ORS=: '$0 != "'$HOST_CCACHE_DIR'"' | sed 's/:$//')
+done
+export PATH
+
 if [ -n "$CC" ]; then
     COMPILER="$CC"
 else
@@ -3536,6 +4130,7 @@ else
     SYSROOT=
 fi
 
+
 # ---------------------------------------
 
 TESTDIR=testdir.$$
@@ -3547,6 +4142,10 @@ cd $TESTDIR || exit 1
 # ---------------------------------------
 
 all_suites="
+nvcc
+nvcc_direct
+nvcc_ldir
+nvcc_nocpp2
 base
 nocpp2
 multi_arch
@@ -3576,6 +4175,14 @@ else
     echo "Compiler:         $COMPILER ($REAL_COMPILER)"
 fi
 echo "Compiler version: $($COMPILER --version | head -n 1)"
+
+REAL_NVCC=$(find_compiler nvcc)
+REAL_CUOBJDUMP=$(find_compiler cuobjdump)
+if [ -n "$REAL_NVCC" ]; then
+    echo "CUDA Compiler:    $($REAL_NVCC --version | tail -n 1) ($REAL_NVCC)"
+else
+    echo "CUDA Compiler:    not available"
+fi
 echo
 
 VERBOSE=false