1 //===--- HIP.cpp - HIP Tool and ToolChain Implementations -------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "CommonArgs.h"
11 #include "InputInfo.h"
12 #include "clang/Basic/Cuda.h"
13 #include "clang/Driver/Compilation.h"
14 #include "clang/Driver/Driver.h"
15 #include "clang/Driver/DriverDiagnostic.h"
16 #include "clang/Driver/Options.h"
17 #include "llvm/Support/FileSystem.h"
18 #include "llvm/Support/Path.h"
20 using namespace clang::driver;
21 using namespace clang::driver::toolchains;
22 using namespace clang::driver::tools;
23 using namespace clang;
24 using namespace llvm::opt;
26 #if defined(_WIN32) || defined(_WIN64)
27 #define NULL_FILE "nul"
29 #define NULL_FILE "/dev/null"
34 static void addBCLib(const Driver &D, const ArgList &Args,
35 ArgStringList &CmdArgs, ArgStringList LibraryPaths,
38 for (std::string LibraryPath : LibraryPaths) {
39 SmallString<128> Path(LibraryPath);
40 llvm::sys::path::append(Path, BCName);
42 if (llvm::sys::fs::exists(FullName)) {
43 CmdArgs.push_back("-mlink-builtin-bitcode");
44 CmdArgs.push_back(Args.MakeArgString(FullName));
48 D.Diag(diag::err_drv_no_such_file) << BCName;
51 static const char *getOutputFileName(Compilation &C, StringRef Base,
53 const char *Extension) {
54 const char *OutputFileName;
55 if (C.getDriver().isSaveTempsEnabled()) {
57 C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension);
60 C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension);
61 OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName));
63 return OutputFileName;
66 static void addOptLevelArgs(const llvm::opt::ArgList &Args,
67 llvm::opt::ArgStringList &CmdArgs,
69 if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
71 if (A->getOption().matches(options::OPT_O4) ||
72 A->getOption().matches(options::OPT_Ofast))
74 else if (A->getOption().matches(options::OPT_O0))
76 else if (A->getOption().matches(options::OPT_O)) {
77 // Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3
78 // so we map -Os/-Oz to -O2.
79 // Only clang supports -Og, and maps it to -O1.
80 // We map anything else to -O2.
81 OOpt = llvm::StringSwitch<const char *>(A->getValue())
85 .Case("s", IsLlc ? "2" : "s")
86 .Case("z", IsLlc ? "2" : "z")
90 CmdArgs.push_back(Args.MakeArgString("-O" + OOpt));
95 const char *AMDGCN::Linker::constructLLVMLinkCommand(
96 Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
97 const ArgList &Args, StringRef SubArchName,
98 StringRef OutputFilePrefix) const {
99 ArgStringList CmdArgs;
100 // Add the input bc's created by compile step.
101 for (const auto &II : Inputs)
102 CmdArgs.push_back(II.getFilename());
104 // Add an intermediate output file.
105 CmdArgs.push_back("-o");
106 auto OutputFileName = getOutputFileName(C, OutputFilePrefix, "-linked", "bc");
107 CmdArgs.push_back(OutputFileName);
109 Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
110 C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
111 return OutputFileName;
114 const char *AMDGCN::Linker::constructOptCommand(
115 Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
116 const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
117 llvm::StringRef OutputFilePrefix, const char *InputFileName) const {
118 // Construct opt command.
119 ArgStringList OptArgs;
120 // The input to opt is the output from llvm-link.
121 OptArgs.push_back(InputFileName);
122 // Pass optimization arg to opt.
123 addOptLevelArgs(Args, OptArgs);
124 OptArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
125 OptArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
127 for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
128 OptArgs.push_back(A->getValue(0));
131 OptArgs.push_back("-o");
132 auto OutputFileName =
133 getOutputFileName(C, OutputFilePrefix, "-optimized", "bc");
134 OptArgs.push_back(OutputFileName);
135 const char *OptExec =
136 Args.MakeArgString(getToolChain().GetProgramPath("opt"));
137 C.addCommand(std::make_unique<Command>(JA, *this, OptExec, OptArgs, Inputs));
138 return OutputFileName;
141 const char *AMDGCN::Linker::constructLlcCommand(
142 Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
143 const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
144 llvm::StringRef OutputFilePrefix, const char *InputFileName,
145 bool OutputIsAsm) const {
146 // Construct llc command.
147 ArgStringList LlcArgs;
148 // The input to llc is the output from opt.
149 LlcArgs.push_back(InputFileName);
150 // Pass optimization arg to llc.
151 addOptLevelArgs(Args, LlcArgs, /*IsLlc=*/true);
152 LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
153 LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
155 Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")));
157 // Extract all the -m options
158 std::vector<llvm::StringRef> Features;
159 handleTargetFeaturesGroup(
160 Args, Features, options::OPT_m_amdgpu_Features_Group);
162 // Add features to mattr such as xnack
163 std::string MAttrString = "-mattr=";
164 for(auto OneFeature : Features) {
165 MAttrString.append(Args.MakeArgString(OneFeature));
166 if (OneFeature != Features.back())
167 MAttrString.append(",");
169 if(!Features.empty())
170 LlcArgs.push_back(Args.MakeArgString(MAttrString));
172 for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
173 LlcArgs.push_back(A->getValue(0));
176 // Add output filename
177 LlcArgs.push_back("-o");
179 getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o");
180 LlcArgs.push_back(LlcOutputFile);
181 const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc"));
182 C.addCommand(std::make_unique<Command>(JA, *this, Llc, LlcArgs, Inputs));
183 return LlcOutputFile;
186 void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
187 const InputInfoList &Inputs,
188 const InputInfo &Output,
189 const llvm::opt::ArgList &Args,
190 const char *InputFileName) const {
191 // Construct lld command.
192 // The output from ld.lld is an HSA code object file.
193 ArgStringList LldArgs{
194 "-flavor", "gnu", "-shared", "-o", Output.getFilename(), InputFileName};
195 const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
196 C.addCommand(std::make_unique<Command>(JA, *this, Lld, LldArgs, Inputs));
199 // Construct a clang-offload-bundler command to bundle code objects for
200 // different GPU's into a HIP fat binary.
201 void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
202 StringRef OutputFileName, const InputInfoList &Inputs,
203 const llvm::opt::ArgList &Args, const Tool& T) {
204 // Construct clang-offload-bundler command to bundle object files for
205 // for different GPU archs.
206 ArgStringList BundlerArgs;
207 BundlerArgs.push_back(Args.MakeArgString("-type=o"));
209 // ToDo: Remove the dummy host binary entry which is required by
210 // clang-offload-bundler.
211 std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
212 std::string BundlerInputArg = "-inputs=" NULL_FILE;
214 for (const auto &II : Inputs) {
215 const auto* A = II.getAction();
216 BundlerTargetArg = BundlerTargetArg + ",hip-amdgcn-amd-amdhsa-" +
217 StringRef(A->getOffloadingArch()).str();
218 BundlerInputArg = BundlerInputArg + "," + II.getFilename();
220 BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
221 BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg));
223 auto BundlerOutputArg =
224 Args.MakeArgString(std::string("-outputs=").append(OutputFileName));
225 BundlerArgs.push_back(BundlerOutputArg);
227 const char *Bundler = Args.MakeArgString(
228 T.getToolChain().GetProgramPath("clang-offload-bundler"));
229 C.addCommand(std::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs));
232 // For amdgcn the inputs of the linker job are device bitcode and output is
233 // object file. It calls llvm-link, opt, llc, then lld steps.
234 void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
235 const InputInfo &Output,
236 const InputInfoList &Inputs,
238 const char *LinkingOutput) const {
240 if (JA.getType() == types::TY_HIP_FATBIN)
241 return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);
243 assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn &&
244 "Unsupported target");
246 std::string SubArchName = JA.getOffloadingArch();
247 assert(StringRef(SubArchName).startswith("gfx") && "Unsupported sub arch");
249 // Prefix for temporary file name.
250 std::string Prefix = llvm::sys::path::stem(Inputs[0].getFilename()).str();
251 if (!C.getDriver().isSaveTempsEnabled())
252 Prefix += "-" + SubArchName;
254 // Each command outputs different files.
255 const char *LLVMLinkCommand =
256 constructLLVMLinkCommand(C, JA, Inputs, Args, SubArchName, Prefix);
257 const char *OptCommand = constructOptCommand(C, JA, Inputs, Args, SubArchName,
258 Prefix, LLVMLinkCommand);
259 if (C.getDriver().isSaveTempsEnabled())
260 constructLlcCommand(C, JA, Inputs, Args, SubArchName, Prefix, OptCommand,
261 /*OutputIsAsm=*/true);
262 const char *LlcCommand =
263 constructLlcCommand(C, JA, Inputs, Args, SubArchName, Prefix, OptCommand);
264 constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand);
267 HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
268 const ToolChain &HostTC, const ArgList &Args)
269 : ToolChain(D, Triple, Args), HostTC(HostTC) {
270 // Lookup binaries into the driver directory, this is used to
271 // discover the clang-offload-bundler executable.
272 getProgramPaths().push_back(getDriver().Dir);
275 void HIPToolChain::addClangTargetOptions(
276 const llvm::opt::ArgList &DriverArgs,
277 llvm::opt::ArgStringList &CC1Args,
278 Action::OffloadKind DeviceOffloadingKind) const {
279 HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
281 StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
282 assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
284 assert(DeviceOffloadingKind == Action::OFK_HIP &&
285 "Only HIP offloading kinds are supported for GPUs.");
287 CC1Args.push_back("-target-cpu");
288 CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch));
289 CC1Args.push_back("-fcuda-is-device");
291 if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
292 options::OPT_fno_cuda_flush_denormals_to_zero, false))
293 CC1Args.push_back("-fcuda-flush-denormals-to-zero");
295 if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
296 options::OPT_fno_cuda_approx_transcendentals, false))
297 CC1Args.push_back("-fcuda-approx-transcendentals");
299 if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
301 CC1Args.push_back("-fgpu-rdc");
303 StringRef MaxThreadsPerBlock =
304 DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);
305 if (!MaxThreadsPerBlock.empty()) {
307 std::string("--gpu-max-threads-per-block=") + MaxThreadsPerBlock.str();
308 CC1Args.push_back(DriverArgs.MakeArgStringRef(ArgStr));
311 if (DriverArgs.hasFlag(options::OPT_fgpu_allow_device_init,
312 options::OPT_fno_gpu_allow_device_init, false))
313 CC1Args.push_back("-fgpu-allow-device-init");
315 CC1Args.push_back("-fcuda-allow-variadic-functions");
317 // Default to "hidden" visibility, as object level linking will not be
318 // supported for the foreseeable future.
319 if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
320 options::OPT_fvisibility_ms_compat)) {
321 CC1Args.append({"-fvisibility", "hidden"});
322 CC1Args.push_back("-fapply-global-visibility-to-externs");
325 if (DriverArgs.hasArg(options::OPT_nogpulib))
327 ArgStringList LibraryPaths;
329 // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
331 DriverArgs.getAllArgValues(options::OPT_hip_device_lib_path_EQ))
332 LibraryPaths.push_back(DriverArgs.MakeArgString(Path));
334 addDirectoryList(DriverArgs, LibraryPaths, "-L", "HIP_DEVICE_LIB_PATH");
336 llvm::SmallVector<std::string, 10> BCLibs;
338 // Add bitcode library in --hip-device-lib.
339 for (auto Lib : DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ)) {
340 BCLibs.push_back(DriverArgs.MakeArgString(Lib));
343 // If --hip-device-lib is not set, add the default bitcode libraries.
344 if (BCLibs.empty()) {
345 // Get the bc lib file name for ISA version. For example,
346 // gfx803 => oclc_isa_version_803.amdgcn.bc.
347 std::string GFXVersion = GpuArch.drop_front(3).str();
348 std::string ISAVerBC = "oclc_isa_version_" + GFXVersion + ".amdgcn.bc";
350 llvm::StringRef FlushDenormalControlBC;
351 if (DriverArgs.hasArg(options::OPT_fcuda_flush_denormals_to_zero))
352 FlushDenormalControlBC = "oclc_daz_opt_on.amdgcn.bc";
354 FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc";
356 llvm::StringRef WaveFrontSizeBC;
357 if (stoi(GFXVersion) < 1000)
358 WaveFrontSizeBC = "oclc_wavefrontsize64_on.amdgcn.bc";
360 WaveFrontSizeBC = "oclc_wavefrontsize64_off.amdgcn.bc";
362 BCLibs.append({"hip.amdgcn.bc", "ocml.amdgcn.bc", "ockl.amdgcn.bc",
363 "oclc_finite_only_off.amdgcn.bc", FlushDenormalControlBC,
364 "oclc_correctly_rounded_sqrt_on.amdgcn.bc",
365 "oclc_unsafe_math_off.amdgcn.bc", ISAVerBC,
368 for (auto Lib : BCLibs)
369 addBCLib(getDriver(), DriverArgs, CC1Args, LibraryPaths, Lib);
372 llvm::opt::DerivedArgList *
373 HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
375 Action::OffloadKind DeviceOffloadKind) const {
376 DerivedArgList *DAL =
377 HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
379 DAL = new DerivedArgList(Args.getBaseArgs());
381 const OptTable &Opts = getDriver().getOpts();
383 for (Arg *A : Args) {
384 if (A->getOption().matches(options::OPT_Xarch__)) {
385 // Skip this argument unless the architecture matches BoundArch.
386 if (BoundArch.empty() || A->getValue(0) != BoundArch)
389 unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
390 unsigned Prev = Index;
391 std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
393 // If the argument parsing failed or more than one argument was
394 // consumed, the -Xarch_ argument's parameter tried to consume
395 // extra arguments. Emit an error and ignore.
397 // We also want to disallow any options which would alter the
398 // driver behavior; that isn't going to work in our model. We
399 // use isDriverOption() as an approximation, although things
400 // like -O4 are going to slip through.
401 if (!XarchArg || Index > Prev + 1) {
402 getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
403 << A->getAsString(Args);
405 } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
406 getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
407 << A->getAsString(Args);
410 XarchArg->setBaseArg(A);
411 A = XarchArg.release();
412 DAL->AddSynthesizedArg(A);
417 if (!BoundArch.empty()) {
418 DAL->eraseArg(options::OPT_march_EQ);
419 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
425 Tool *HIPToolChain::buildLinker() const {
426 assert(getTriple().getArch() == llvm::Triple::amdgcn);
427 return new tools::AMDGCN::Linker(*this);
430 void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
431 HostTC.addClangWarningOptions(CC1Args);
434 ToolChain::CXXStdlibType
435 HIPToolChain::GetCXXStdlibType(const ArgList &Args) const {
436 return HostTC.GetCXXStdlibType(Args);
439 void HIPToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
440 ArgStringList &CC1Args) const {
441 HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
444 void HIPToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
445 ArgStringList &CC1Args) const {
446 HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
449 void HIPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
450 ArgStringList &CC1Args) const {
451 HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
454 SanitizerMask HIPToolChain::getSupportedSanitizers() const {
455 // The HIPToolChain only supports sanitizers in the sense that it allows
456 // sanitizer arguments on the command line if they are supported by the host
457 // toolchain. The HIPToolChain will actually ignore any command line
458 // arguments for any of these "supported" sanitizers. That means that no
459 // sanitization of device code is actually supported at this time.
461 // This behavior is necessary because the host and device toolchains
462 // invocations often share the command line, so the device toolchain must
463 // tolerate flags meant only for the host toolchain.
464 return HostTC.getSupportedSanitizers();
467 VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D,
468 const ArgList &Args) const {
469 return HostTC.computeMSVCVersion(D, Args);