1 //===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 #include "InputInfo.h"
12 #include "CommonArgs.h"
13 #include "clang/Basic/Cuda.h"
14 #include "clang/Config/config.h"
15 #include "clang/Basic/VirtualFileSystem.h"
16 #include "clang/Driver/Distro.h"
17 #include "clang/Driver/Compilation.h"
18 #include "clang/Driver/Driver.h"
19 #include "clang/Driver/DriverDiagnostic.h"
20 #include "clang/Driver/Options.h"
21 #include "llvm/Option/ArgList.h"
22 #include "llvm/Support/Path.h"
23 #include <system_error>
25 using namespace clang::driver;
26 using namespace clang::driver::toolchains;
27 using namespace clang::driver::tools;
28 using namespace clang;
29 using namespace llvm::opt;
31 // Parses the contents of version.txt in an CUDA installation. It should
32 // contain one line of the from e.g. "CUDA Version 7.5.2".
33 static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
34 if (!V.startswith("CUDA Version "))
35 return CudaVersion::UNKNOWN;
36 V = V.substr(strlen("CUDA Version "));
37 int Major = -1, Minor = -1;
38 auto First = V.split('.');
39 auto Second = First.second.split('.');
40 if (First.first.getAsInteger(10, Major) ||
41 Second.first.getAsInteger(10, Minor))
42 return CudaVersion::UNKNOWN;
44 if (Major == 7 && Minor == 0) {
45 // This doesn't appear to ever happen -- version.txt doesn't exist in the
46 // CUDA 7 installs I've seen. But no harm in checking.
47 return CudaVersion::CUDA_70;
49 if (Major == 7 && Minor == 5)
50 return CudaVersion::CUDA_75;
51 if (Major == 8 && Minor == 0)
52 return CudaVersion::CUDA_80;
53 if (Major == 9 && Minor == 0)
54 return CudaVersion::CUDA_90;
55 return CudaVersion::UNKNOWN;
58 CudaInstallationDetector::CudaInstallationDetector(
59 const Driver &D, const llvm::Triple &HostTriple,
60 const llvm::opt::ArgList &Args)
62 SmallVector<std::string, 4> CudaPathCandidates;
64 // In decreasing order so we prefer newer versions to older versions.
65 std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
67 if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
68 CudaPathCandidates.push_back(
69 Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ));
70 } else if (HostTriple.isOSWindows()) {
71 for (const char *Ver : Versions)
72 CudaPathCandidates.push_back(
73 D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
76 CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
77 for (const char *Ver : Versions)
78 CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver);
80 if (Distro(D.getVFS()).IsDebian())
81 // Special case for Debian to have nvidia-cuda-toolkit work
82 // out of the box. More info on http://bugs.debian.org/882505
83 CudaPathCandidates.push_back(D.SysRoot + "/usr/lib/cuda");
86 for (const auto &CudaPath : CudaPathCandidates) {
87 if (CudaPath.empty() || !D.getVFS().exists(CudaPath))
90 InstallPath = CudaPath;
91 BinPath = CudaPath + "/bin";
92 IncludePath = InstallPath + "/include";
93 LibDevicePath = InstallPath + "/nvvm/libdevice";
95 auto &FS = D.getVFS();
96 if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
99 // On Linux, we have both lib and lib64 directories, and we need to choose
100 // based on our triple. On MacOS, we have only a lib directory.
102 // It's sufficient for our purposes to be flexible: If both lib and lib64
103 // exist, we choose whichever one matches our triple. Otherwise, if only
104 // lib exists, we use it.
105 if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
106 LibPath = InstallPath + "/lib64";
107 else if (FS.exists(InstallPath + "/lib"))
108 LibPath = InstallPath + "/lib";
112 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
113 FS.getBufferForFile(InstallPath + "/version.txt");
115 // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
116 // version.txt isn't present.
117 Version = CudaVersion::CUDA_70;
119 Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
122 if (Version == CudaVersion::CUDA_90) {
123 // CUDA-9 uses single libdevice file for all GPU variants.
124 std::string FilePath = LibDevicePath + "/libdevice.10.bc";
125 if (FS.exists(FilePath)) {
126 for (const char *GpuArch :
127 {"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
128 "sm_60", "sm_61", "sm_62", "sm_70"})
129 LibDeviceMap[GpuArch] = FilePath;
133 for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
134 !EC && LI != LE; LI = LI.increment(EC)) {
135 StringRef FilePath = LI->path();
136 StringRef FileName = llvm::sys::path::filename(FilePath);
137 // Process all bitcode filenames that look like
138 // libdevice.compute_XX.YY.bc
139 const StringRef LibDeviceName = "libdevice.";
140 if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
142 StringRef GpuArch = FileName.slice(
143 LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
144 LibDeviceMap[GpuArch] = FilePath.str();
145 // Insert map entries for specifc devices with this compute
146 // capability. NVCC's choice of the libdevice library version is
147 // rather peculiar and depends on the CUDA version.
148 if (GpuArch == "compute_20") {
149 LibDeviceMap["sm_20"] = FilePath;
150 LibDeviceMap["sm_21"] = FilePath;
151 LibDeviceMap["sm_32"] = FilePath;
152 } else if (GpuArch == "compute_30") {
153 LibDeviceMap["sm_30"] = FilePath;
154 if (Version < CudaVersion::CUDA_80) {
155 LibDeviceMap["sm_50"] = FilePath;
156 LibDeviceMap["sm_52"] = FilePath;
157 LibDeviceMap["sm_53"] = FilePath;
159 LibDeviceMap["sm_60"] = FilePath;
160 LibDeviceMap["sm_61"] = FilePath;
161 LibDeviceMap["sm_62"] = FilePath;
162 } else if (GpuArch == "compute_35") {
163 LibDeviceMap["sm_35"] = FilePath;
164 LibDeviceMap["sm_37"] = FilePath;
165 } else if (GpuArch == "compute_50") {
166 if (Version >= CudaVersion::CUDA_80) {
167 LibDeviceMap["sm_50"] = FilePath;
168 LibDeviceMap["sm_52"] = FilePath;
169 LibDeviceMap["sm_53"] = FilePath;
175 // Check that we have found at least one libdevice that we can link in if
176 // -nocudalib hasn't been specified.
177 if (LibDeviceMap.empty() && !Args.hasArg(options::OPT_nocudalib))
185 void CudaInstallationDetector::AddCudaIncludeArgs(
186 const ArgList &DriverArgs, ArgStringList &CC1Args) const {
187 if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
188 // Add cuda_wrappers/* to our system include path. This lets us wrap
189 // standard library headers.
190 SmallString<128> P(D.ResourceDir);
191 llvm::sys::path::append(P, "include");
192 llvm::sys::path::append(P, "cuda_wrappers");
193 CC1Args.push_back("-internal-isystem");
194 CC1Args.push_back(DriverArgs.MakeArgString(P));
197 if (DriverArgs.hasArg(options::OPT_nocudainc))
201 D.Diag(diag::err_drv_no_cuda_installation);
205 CC1Args.push_back("-internal-isystem");
206 CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
207 CC1Args.push_back("-include");
208 CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
211 void CudaInstallationDetector::CheckCudaVersionSupportsArch(
212 CudaArch Arch) const {
213 if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
214 ArchsWithBadVersion.count(Arch) > 0)
217 auto MinVersion = MinVersionForCudaArch(Arch);
218 auto MaxVersion = MaxVersionForCudaArch(Arch);
219 if (Version < MinVersion || Version > MaxVersion) {
220 ArchsWithBadVersion.insert(Arch);
221 D.Diag(diag::err_drv_cuda_version_unsupported)
222 << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
223 << CudaVersionToString(MaxVersion) << InstallPath
224 << CudaVersionToString(Version);
228 void CudaInstallationDetector::print(raw_ostream &OS) const {
230 OS << "Found CUDA installation: " << InstallPath << ", version "
231 << CudaVersionToString(Version) << "\n";
234 void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
235 const InputInfo &Output,
236 const InputInfoList &Inputs,
238 const char *LinkingOutput) const {
240 static_cast<const toolchains::CudaToolChain &>(getToolChain());
241 assert(TC.getTriple().isNVPTX() && "Wrong platform");
243 StringRef GPUArchName;
244 // If this is an OpenMP action we need to extract the device architecture
245 // from the -march=arch option. This option may come from -Xopenmp-target
246 // flag or the default value.
247 if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
248 GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
249 assert(!GPUArchName.empty() && "Must have an architecture passed in.");
251 GPUArchName = JA.getOffloadingArch();
253 // Obtain architecture from the action.
254 CudaArch gpu_arch = StringToCudaArch(GPUArchName);
255 assert(gpu_arch != CudaArch::UNKNOWN &&
256 "Device action expected to have an architecture.");
258 // Check that our installation's ptxas supports gpu_arch.
259 if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
260 TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
263 ArgStringList CmdArgs;
264 CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
265 if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
266 options::OPT_no_cuda_noopt_device_debug, false)) {
267 // ptxas does not accept -g option if optimization is enabled, so
268 // we ignore the compiler's -O* options if we want debug info.
269 CmdArgs.push_back("-g");
270 CmdArgs.push_back("--dont-merge-basicblocks");
271 CmdArgs.push_back("--return-at-end");
272 } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
273 // Map the -O we received to -O{0,1,2,3}.
275 // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
276 // default, so it may correspond more closely to the spirit of clang -O2.
278 // -O3 seems like the least-bad option when -Osomething is specified to
279 // clang but it isn't handled below.
280 StringRef OOpt = "3";
281 if (A->getOption().matches(options::OPT_O4) ||
282 A->getOption().matches(options::OPT_Ofast))
284 else if (A->getOption().matches(options::OPT_O0))
286 else if (A->getOption().matches(options::OPT_O)) {
287 // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
288 OOpt = llvm::StringSwitch<const char *>(A->getValue())
296 CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
298 // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
299 // to no optimizations, but ptxas's default is -O3.
300 CmdArgs.push_back("-O0");
303 // Pass -v to ptxas if it was passed to the driver.
304 if (Args.hasArg(options::OPT_v))
305 CmdArgs.push_back("-v");
307 CmdArgs.push_back("--gpu-name");
308 CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
309 CmdArgs.push_back("--output-file");
310 CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
311 for (const auto& II : Inputs)
312 CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
314 for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
315 CmdArgs.push_back(Args.MakeArgString(A));
317 // In OpenMP we need to generate relocatable code.
318 if (JA.isOffloading(Action::OFK_OpenMP) &&
319 Args.hasFlag(options::OPT_fopenmp_relocatable_target,
320 options::OPT_fnoopenmp_relocatable_target,
322 CmdArgs.push_back("-c");
325 if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
326 Exec = A->getValue();
328 Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
329 C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
332 // All inputs to this linker must be from CudaDeviceActions, as we need to look
333 // at the Inputs' Actions in order to figure out which GPU architecture they
335 void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
336 const InputInfo &Output,
337 const InputInfoList &Inputs,
339 const char *LinkingOutput) const {
341 static_cast<const toolchains::CudaToolChain &>(getToolChain());
342 assert(TC.getTriple().isNVPTX() && "Wrong platform");
344 ArgStringList CmdArgs;
345 CmdArgs.push_back("--cuda");
346 CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
347 CmdArgs.push_back(Args.MakeArgString("--create"));
348 CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
350 for (const auto& II : Inputs) {
351 auto *A = II.getAction();
352 assert(A->getInputs().size() == 1 &&
353 "Device offload action is expected to have a single input");
354 const char *gpu_arch_str = A->getOffloadingArch();
355 assert(gpu_arch_str &&
356 "Device action expected to have associated a GPU architecture!");
357 CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
359 // We need to pass an Arch of the form "sm_XX" for cubin files and
360 // "compute_XX" for ptx.
362 (II.getType() == types::TY_PP_Asm)
363 ? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
365 CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
366 Arch + ",file=" + II.getFilename()));
369 for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
370 CmdArgs.push_back(Args.MakeArgString(A));
372 const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
373 C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
376 void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
377 const InputInfo &Output,
378 const InputInfoList &Inputs,
380 const char *LinkingOutput) const {
382 static_cast<const toolchains::CudaToolChain &>(getToolChain());
383 assert(TC.getTriple().isNVPTX() && "Wrong platform");
385 ArgStringList CmdArgs;
387 // OpenMP uses nvlink to link cubin files. The result will be embedded in the
388 // host binary by the host linker.
389 assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
390 "CUDA toolchain not expected for an OpenMP host device.");
392 if (Output.isFilename()) {
393 CmdArgs.push_back("-o");
394 CmdArgs.push_back(Output.getFilename());
396 assert(Output.isNothing() && "Invalid output.");
397 if (Args.hasArg(options::OPT_g_Flag))
398 CmdArgs.push_back("-g");
400 if (Args.hasArg(options::OPT_v))
401 CmdArgs.push_back("-v");
404 Args.getLastArgValue(options::OPT_march_EQ);
405 assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
407 CmdArgs.push_back("-arch");
408 CmdArgs.push_back(Args.MakeArgString(GPUArch));
410 // Add paths specified in LIBRARY_PATH environment variable as -L options.
411 addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
413 // Add paths for the default clang library path.
414 SmallString<256> DefaultLibPath =
415 llvm::sys::path::parent_path(TC.getDriver().Dir);
416 llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
417 CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
419 // Add linking against library implementing OpenMP calls on NVPTX target.
420 CmdArgs.push_back("-lomptarget-nvptx");
422 for (const auto &II : Inputs) {
423 if (II.getType() == types::TY_LLVM_IR ||
424 II.getType() == types::TY_LTO_IR ||
425 II.getType() == types::TY_LTO_BC ||
426 II.getType() == types::TY_LLVM_BC) {
427 C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
428 << getToolChain().getTripleString();
432 // Currently, we only pass the input files to the linker, we do not pass
433 // any libraries that may be valid only for the host.
434 if (!II.isFilename())
437 const char *CubinF = C.addTempFile(
438 C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
440 CmdArgs.push_back(CubinF);
443 AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
446 Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
447 C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
450 /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
451 /// which isn't properly a linker but nonetheless performs the step of stitching
452 /// together object files from the assembler into a single blob.
454 CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
455 const ToolChain &HostTC, const ArgList &Args,
456 const Action::OffloadKind OK)
457 : ToolChain(D, Triple, Args), HostTC(HostTC),
458 CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
459 if (CudaInstallation.isValid())
460 getProgramPaths().push_back(CudaInstallation.getBinPath());
461 // Lookup binaries into the driver directory, this is used to
462 // discover the clang-offload-bundler executable.
463 getProgramPaths().push_back(getDriver().Dir);
466 std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
467 // Only object files are changed, for example assembly files keep their .s
468 // extensions. CUDA also continues to use .o as they don't use nvlink but
470 if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
471 return ToolChain::getInputFilename(Input);
473 // Replace extension for object files with cubin because nvlink relies on
474 // these particular file names.
475 SmallString<256> Filename(ToolChain::getInputFilename(Input));
476 llvm::sys::path::replace_extension(Filename, "cubin");
477 return Filename.str();
480 void CudaToolChain::addClangTargetOptions(
481 const llvm::opt::ArgList &DriverArgs,
482 llvm::opt::ArgStringList &CC1Args,
483 Action::OffloadKind DeviceOffloadingKind) const {
484 HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
486 StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
487 assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
488 assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
489 DeviceOffloadingKind == Action::OFK_Cuda) &&
490 "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
492 if (DeviceOffloadingKind == Action::OFK_Cuda) {
493 CC1Args.push_back("-fcuda-is-device");
495 if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
496 options::OPT_fno_cuda_flush_denormals_to_zero, false))
497 CC1Args.push_back("-fcuda-flush-denormals-to-zero");
499 if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
500 options::OPT_fno_cuda_approx_transcendentals, false))
501 CC1Args.push_back("-fcuda-approx-transcendentals");
504 if (DriverArgs.hasArg(options::OPT_nocudalib))
507 std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
509 if (LibDeviceFile.empty()) {
510 if (DeviceOffloadingKind == Action::OFK_OpenMP &&
511 DriverArgs.hasArg(options::OPT_S))
514 getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
518 CC1Args.push_back("-mlink-cuda-bitcode");
519 CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
521 if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
522 // CUDA-9 uses new instructions that are only available in PTX6.0
523 CC1Args.push_back("-target-feature");
524 CC1Args.push_back("+ptx60");
526 // Libdevice in CUDA-7.0 requires PTX version that's more recent
527 // than LLVM defaults to. Use PTX4.2 which is the PTX version that
528 // came with CUDA-7.0.
529 CC1Args.push_back("-target-feature");
530 CC1Args.push_back("+ptx42");
534 void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
535 ArgStringList &CC1Args) const {
536 // Check our CUDA version if we're going to include the CUDA headers.
537 if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
538 !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
539 StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
540 assert(!Arch.empty() && "Must have an explicit GPU arch.");
541 CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
543 CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
546 llvm::opt::DerivedArgList *
547 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
549 Action::OffloadKind DeviceOffloadKind) const {
550 DerivedArgList *DAL =
551 HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
553 DAL = new DerivedArgList(Args.getBaseArgs());
555 const OptTable &Opts = getDriver().getOpts();
557 // For OpenMP device offloading, append derived arguments. Make sure
558 // flags are not duplicated.
559 // Also append the compute capability.
560 if (DeviceOffloadKind == Action::OFK_OpenMP) {
561 for (Arg *A : Args) {
562 bool IsDuplicate = false;
563 for (Arg *DALArg : *DAL) {
573 StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
575 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
576 CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
581 for (Arg *A : Args) {
582 if (A->getOption().matches(options::OPT_Xarch__)) {
583 // Skip this argument unless the architecture matches BoundArch
584 if (BoundArch.empty() || A->getValue(0) != BoundArch)
587 unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
588 unsigned Prev = Index;
589 std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
591 // If the argument parsing failed or more than one argument was
592 // consumed, the -Xarch_ argument's parameter tried to consume
593 // extra arguments. Emit an error and ignore.
595 // We also want to disallow any options which would alter the
596 // driver behavior; that isn't going to work in our model. We
597 // use isDriverOption() as an approximation, although things
598 // like -O4 are going to slip through.
599 if (!XarchArg || Index > Prev + 1) {
600 getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
601 << A->getAsString(Args);
603 } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
604 getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
605 << A->getAsString(Args);
608 XarchArg->setBaseArg(A);
609 A = XarchArg.release();
610 DAL->AddSynthesizedArg(A);
615 if (!BoundArch.empty()) {
616 DAL->eraseArg(options::OPT_march_EQ);
617 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
622 Tool *CudaToolChain::buildAssembler() const {
623 return new tools::NVPTX::Assembler(*this);
626 Tool *CudaToolChain::buildLinker() const {
627 if (OK == Action::OFK_OpenMP)
628 return new tools::NVPTX::OpenMPLinker(*this);
629 return new tools::NVPTX::Linker(*this);
632 void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
633 HostTC.addClangWarningOptions(CC1Args);
636 ToolChain::CXXStdlibType
637 CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
638 return HostTC.GetCXXStdlibType(Args);
641 void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
642 ArgStringList &CC1Args) const {
643 HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
646 void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
647 ArgStringList &CC1Args) const {
648 HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
651 void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
652 ArgStringList &CC1Args) const {
653 HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
656 SanitizerMask CudaToolChain::getSupportedSanitizers() const {
657 // The CudaToolChain only supports sanitizers in the sense that it allows
658 // sanitizer arguments on the command line if they are supported by the host
659 // toolchain. The CudaToolChain will actually ignore any command line
660 // arguments for any of these "supported" sanitizers. That means that no
661 // sanitization of device code is actually supported at this time.
663 // This behavior is necessary because the host and device toolchains
664 // invocations often share the command line, so the device toolchain must
665 // tolerate flags meant only for the host toolchain.
666 return HostTC.getSupportedSanitizers();
669 VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
670 const ArgList &Args) const {
671 return HostTC.computeMSVCVersion(D, Args);