test/Driver/cuda-options.cu

   1 // Tests CUDA compilation pipeline construction in Driver.
   2 // REQUIRES: clang-driver
   3 // REQUIRES: x86-registered-target
   4 // REQUIRES: nvptx-registered-target
   5
   6 // Simple compilation case. Compile device-side to PTX assembly and make sure
   7 // we use it on the host side.
   8 // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
   9 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  10 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
  11 // RUN:    -check-prefix NOLINK %s
  12
  13 // Typical compilation + link case.
  14 // RUN: %clang -### -target x86_64-linux-gnu %s 2>&1 \
  15 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  16 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
  17 // RUN:    -check-prefix LINK %s
  18
  19 // Verify that --cuda-host-only disables device-side compilation, but doesn't
  20 // disable host-side compilation/linking.
  21 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only %s 2>&1 \
  22 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
  23 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
  24
  25 // Verify that --cuda-device-only disables host-side compilation and linking.
  26 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only %s 2>&1 \
  27 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  28 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
  29
  30 // Check that the last of --cuda-compile-host-device, --cuda-host-only, and
  31 // --cuda-device-only wins.
  32
  33 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
  34 // RUN:    --cuda-host-only %s 2>&1 \
  35 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
  36 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
  37
  38 // RUN: %clang -### -target x86_64-linux-gnu --cuda-compile-host-device \
  39 // RUN:    --cuda-host-only %s 2>&1 \
  40 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
  41 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
  42
  43 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only \
  44 // RUN:    --cuda-device-only %s 2>&1 \
  45 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  46 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
  47
  48 // RUN: %clang -### -target x86_64-linux-gnu --cuda-compile-host-device \
  49 // RUN:    --cuda-device-only %s 2>&1 \
  50 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  51 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
  52
  53 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only \
  54 // RUN:   --cuda-compile-host-device %s 2>&1 \
  55 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  56 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
  57 // RUN:    -check-prefix LINK %s
  58
  59 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
  60 // RUN:   --cuda-compile-host-device %s 2>&1 \
  61 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  62 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
  63 // RUN:    -check-prefix LINK %s
  64
  65 // Verify that --cuda-gpu-arch option passes the correct GPU architecture to
  66 // device compilation.
  67 // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_30 -c %s 2>&1 \
  68 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  69 // RUN:    -check-prefix DEVICE-SM30 -check-prefix HOST \
  70 // RUN:    -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
  71
  72 // Verify that there is one device-side compilation per --cuda-gpu-arch args
  73 // and that all results are included on the host side.
  74 // RUN: %clang -### -target x86_64-linux-gnu \
  75 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \
  76 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
  77 // RUN:             -check-prefixes DEVICE-SM30,DEVICE2-SM35 \
  78 // RUN:             -check-prefixes INCLUDES-DEVICE,INCLUDES-DEVICE2 \
  79 // RUN:             -check-prefixes HOST,HOST-NOSAVE,NOLINK %s
  80
  81 // Verify that device-side results are passed to the correct tool when
  82 // -save-temps is used.
  83 // RUN: %clang -### -target x86_64-linux-gnu -save-temps -c %s 2>&1 \
  84 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-SAVE \
  85 // RUN:    -check-prefix HOST -check-prefix HOST-SAVE -check-prefix NOLINK %s
  86
  87 // Verify that device-side results are passed to the correct tool when
  88 // -fno-integrated-as is used.
  89 // RUN: %clang -### -target x86_64-linux-gnu -fno-integrated-as -c %s 2>&1 \
  90 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  91 // RUN:    -check-prefix HOST -check-prefix HOST-NOSAVE \
  92 // RUN:    -check-prefix HOST-AS -check-prefix NOLINK %s
  93
  94 // Verify that --[no-]cuda-gpu-arch arguments are handled correctly.
  95 // a) --no-cuda-gpu-arch=X negates preceding --cuda-gpu-arch=X
  96 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
  97 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
  98 // RUN:   --no-cuda-gpu-arch=sm_35 \
  99 // RUN:   -c %s 2>&1 \
 100 // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s
 101
 102 // b) --no-cuda-gpu-arch=X negates more than one preceding --cuda-gpu-arch=X
 103 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
 104 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
 105 // RUN:   --no-cuda-gpu-arch=sm_35 \
 106 // RUN:   -c %s 2>&1 \
 107 // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s
 108
 109 // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X
 110 //    we default to sm_20 -- same as if no --cuda-gpu-arch were passed.
 111 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
 112 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
 113 // RUN:   --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \
 114 // RUN:   -c %s 2>&1 \
 115 // RUN: | FileCheck -check-prefixes ARCH-SM20,NOARCH-SM30,NOARCH-SM35 %s
 116
 117 // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
 118 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
 119 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30\
 120 // RUN:   --no-cuda-gpu-arch=sm_50 \
 121 // RUN:   -c %s 2>&1 \
 122 // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s
 123
 124 // e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X
 125 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
 126 // RUN:   --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \
 127 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
 128 // RUN:   -c %s 2>&1 \
 129 // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s
 130
 131 // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
 132 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
 133 // RUN:   --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 \
 134 // RUN:   --no-cuda-gpu-arch=all \
 135 // RUN:   --cuda-gpu-arch=sm_35 \
 136 // RUN:   -c %s 2>&1 \
 137 // RUN: | FileCheck -check-prefixes NOARCH-SM20,NOARCH-SM30,ARCH-SM35 %s
 138
 139 // g) There's no --cuda-gpu-arch=all
 140 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
 141 // RUN:   --cuda-gpu-arch=all \
 142 // RUN:   -c %s 2>&1 \
 143 // RUN: | FileCheck -check-prefix ARCHALLERROR %s
 144
 145
 146 // Verify that --[no-]cuda-include-ptx arguments are handled correctly.
 147 // a) by default we're including PTX for all GPUs.
 148 // RUN: %clang -### -target x86_64-linux-gnu \
 149 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
 150 // RUN:   -c %s 2>&1 \
 151 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
 152
 153 // b) --no-cuda-include-ptx=all disables PTX inclusion for all GPUs
 154 // RUN: %clang -### -target x86_64-linux-gnu \
 155 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
 156 // RUN:   --no-cuda-include-ptx=all \
 157 // RUN:   -c %s 2>&1 \
 158 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,NOPTX-SM30 %s
 159
 160 // c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only.
 161 // RUN: %clang -### -target x86_64-linux-gnu \
 162 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
 163 // RUN:   --no-cuda-include-ptx=sm_35 \
 164 // RUN:   -c %s 2>&1 \
 165 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,PTX-SM30 %s
 166 // RUN: %clang -### -target x86_64-linux-gnu \
 167 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
 168 // RUN:   --no-cuda-include-ptx=sm_30 \
 169 // RUN:   -c %s 2>&1 \
 170 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,NOPTX-SM30 %s
 171
 172 // d) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=all
 173 // RUN: %clang -### -target x86_64-linux-gnu \
 174 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
 175 // RUN:   --no-cuda-include-ptx=all --cuda-include-ptx=all \
 176 // RUN:   -c %s 2>&1 \
 177 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
 178
 179 // e) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=sm_XX
 180 // RUN: %clang -### -target x86_64-linux-gnu \
 181 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
 182 // RUN:   --no-cuda-include-ptx=sm_30 --cuda-include-ptx=all \
 183 // RUN:   -c %s 2>&1 \
 184 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
 185
 186
 187 // ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20"
 188 // NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20"
 189 // ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30"
 190 // NOARCH-SM30-NOT: "-cc1"{{.*}}"-target-cpu" "sm_30"
 191 // ARCH-SM35: "-cc1"{{.*}}"-target-cpu" "sm_35"
 192 // NOARCH-SM35-NOT: "-cc1"{{.*}}"-target-cpu" "sm_35"
 193 // ARCHALLERROR: error: Unsupported CUDA gpu architecture: all
 194
 195 // Match device-side preprocessor and compiler phases with -save-temps.
 196 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 197 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64--linux-gnu"
 198 // DEVICE-SAVE-SAME: "-fcuda-is-device"
 199 // DEVICE-SAVE-SAME: "-x" "cuda"
 200
 201 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 202 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64--linux-gnu"
 203 // DEVICE-SAVE-SAME: "-fcuda-is-device"
 204 // DEVICE-SAVE-SAME: "-x" "cuda-cpp-output"
 205
 206 // Match the job that produces PTX assembly.
 207 // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 208 // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64--linux-gnu"
 209 // DEVICE-SAME: "-fcuda-is-device"
 210 // DEVICE-SM30-SAME: "-target-cpu" "sm_30"
 211 // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]"
 212 // DEVICE-NOSAVE-SAME: "-x" "cuda"
 213 // DEVICE-SAVE-SAME: "-x" "ir"
 214
 215 // Match the call to ptxas (which assembles PTX to SASS).
 216 // DEVICE:ptxas
 217 // DEVICE-SM30-DAG: "--gpu-name" "sm_30"
 218 // DEVICE-DAG: "--output-file" "[[CUBINFILE:[^"]*]]"
 219 // DEVICE-DAG: "[[PTXFILE]]"
 220
 221 // Match another device-side compilation.
 222 // DEVICE2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 223 // DEVICE2-SAME: "-aux-triple" "x86_64--linux-gnu"
 224 // DEVICE2-SAME: "-fcuda-is-device"
 225 // DEVICE2-SM35-SAME: "-target-cpu" "sm_35"
 226 // DEVICE2-SAME: "-o" "[[PTXFILE2:[^"]*]]"
 227 // DEVICE2-SAME: "-x" "cuda"
 228
 229 // Match another call to ptxas.
 230 // DEVICE2: ptxas
 231 // DEVICE2-SM35-DAG: "--gpu-name" "sm_35"
 232 // DEVICE2-DAG: "--output-file" "[[CUBINFILE2:[^"]*]]"
 233 // DEVICE2-DAG: "[[PTXFILE2]]"
 234
 235 // Match no device-side compilation.
 236 // NODEVICE-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 237 // NODEVICE-NOT: "-fcuda-is-device"
 238
 239 // INCLUDES-DEVICE:fatbinary
 240 // INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]"
 241 // INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]"
 242 // INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]"
 243 // INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]"
 244 // INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]"
 245
 246 // Match host-side preprocessor job with -save-temps.
 247 // HOST-SAVE: "-cc1" "-triple" "x86_64--linux-gnu"
 248 // HOST-SAVE-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
 249 // HOST-SAVE-NOT: "-fcuda-is-device"
 250 // HOST-SAVE-SAME: "-x" "cuda"
 251
 252 // Match host-side compilation.
 253 // HOST: "-cc1" "-triple" "x86_64--linux-gnu"
 254 // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
 255 // HOST-NOT: "-fcuda-is-device"
 256 // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
 257 // HOST-NOSAVE-SAME: "-x" "cuda"
 258 // HOST-SAVE-SAME: "-x" "cuda-cpp-output"
 259 // There is only one GPU binary after combining it with fatbinary!
 260 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
 261 // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
 262 // There is only one GPU binary after combining it with fatbinary.
 263 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
 264
 265 // Match external assembler that uses compilation output.
 266 // HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]"
 267
 268 // Match no GPU code inclusion.
 269 // NOINCLUDES-DEVICE-NOT: "-fcuda-include-gpubinary"
 270
 271 // Match no host compilation.
 272 // NOHOST-NOT: "-cc1" "-triple"
 273 // NOHOST-NOT: "-x" "cuda"
 274
 275 // Match linker.
 276 // LINK: "{{.*}}{{ld|link}}{{(.exe)?}}"
 277 // LINK-SAME: "[[HOSTOUTPUT]]"
 278
 279 // Match no linker.
 280 // NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}"
 281
 282 // FATBIN-COMMON:fatbinary
 283 // FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]"
 284 // FATBIN-COMMON: "--image=profile=sm_30,file=
 285 // PTX-SM30: "--image=profile=compute_30,file=
 286 // NOPTX-SM30-NOT: "--image=profile=compute_30,file=
 287 // FATBIN-COMMON: "--image=profile=sm_35,file=
 288 // PTX-SM35: "--image=profile=compute_35,file=
 289 // NOPTX-SM35-NOT: "--image=profile=compute_35,file=