1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \brief This file implements semantic analysis for CUDA constructs.
12 //===----------------------------------------------------------------------===//
14 #include "clang/Sema/Sema.h"
15 #include "clang/AST/ASTContext.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/Lex/Preprocessor.h"
18 #include "clang/Sema/SemaDiagnostic.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallVector.h"
21 using namespace clang;
23 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
24 MultiExprArg ExecConfig,
25 SourceLocation GGGLoc) {
26 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
28 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
29 << "cudaConfigureCall");
30 QualType ConfigQTy = ConfigDecl->getType();
32 DeclRefExpr *ConfigDR = new (Context)
33 DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
34 MarkFunctionReferenced(LLLLoc, ConfigDecl);
36 return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
37 /*IsExecConfig=*/true);
40 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
41 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
42 if (D->hasAttr<CUDAInvalidTargetAttr>())
43 return CFT_InvalidTarget;
45 if (D->hasAttr<CUDAGlobalAttr>())
48 if (D->hasAttr<CUDADeviceAttr>()) {
49 if (D->hasAttr<CUDAHostAttr>())
50 return CFT_HostDevice;
52 } else if (D->hasAttr<CUDAHostAttr>()) {
54 } else if (D->isImplicit()) {
55 // Some implicit declarations (like intrinsic functions) are not marked.
56 // Set the most lenient target on them for maximal flexibility.
57 return CFT_HostDevice;
63 // * CUDA Call preference table
67 // Ph - preference in host mode
68 // Pd - preference in device mode
70 // Preferences: b-best, f-fallback, l-last resort, n-never.
72 // | F | T | Ph | Pd | H |
73 // |----+----+----+----+-----+
74 // | d | d | b | b | (b) |
75 // | d | g | n | n | (a) |
76 // | d | h | l | l | (e) |
77 // | d | hd | f | f | (c) |
78 // | g | d | b | b | (b) |
79 // | g | g | n | n | (a) |
80 // | g | h | l | l | (e) |
81 // | g | hd | f | f | (c) |
82 // | h | d | l | l | (e) |
83 // | h | g | b | b | (b) |
84 // | h | h | b | b | (b) |
85 // | h | hd | f | f | (c) |
86 // | hd | d | l | f | (d) |
87 // | hd | g | f | n |(d/a)|
88 // | hd | h | f | l | (d) |
89 // | hd | hd | b | b | (b) |
91 Sema::CUDAFunctionPreference
92 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
93 const FunctionDecl *Callee) {
94 assert(getLangOpts().CUDATargetOverloads &&
95 "Should not be called w/o enabled target overloads.");
97 assert(Callee && "Callee must be valid.");
98 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
99 CUDAFunctionTarget CallerTarget =
100 (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
102 // If one of the targets is invalid, the check always fails, no matter what
103 // the other target is.
104 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
107 // (a) Can't call global from some contexts until we support CUDA's
108 // dynamic parallelism.
109 if (CalleeTarget == CFT_Global &&
110 (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
111 (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
114 // (b) Best case scenarios
115 if (CalleeTarget == CallerTarget ||
116 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
117 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
120 // (c) Calling HostDevice is OK as a fallback that works for everyone.
121 if (CalleeTarget == CFT_HostDevice)
124 // Figure out what should be returned 'last resort' cases. Normally
125 // those would not be allowed, but we'll consider them if
126 // CUDADisableTargetCallChecks is true.
127 CUDAFunctionPreference QuestionableResult =
128 getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;
130 // (d) HostDevice behavior depends on compilation mode.
131 if (CallerTarget == CFT_HostDevice) {
132 // Calling a function that matches compilation mode is OK.
133 // Calling a function from the other side is frowned upon.
134 if (getLangOpts().CUDAIsDevice)
135 return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
137 return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)
139 : QuestionableResult;
142 // (e) Calling across device/host boundary is not something you should do.
143 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
144 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
145 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
146 return QuestionableResult;
148 llvm_unreachable("All cases should've been handled by now.");
151 bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
152 const FunctionDecl *Callee) {
153 // With target overloads enabled, we only disallow calling
154 // combinations with CFP_Never.
155 if (getLangOpts().CUDATargetOverloads)
156 return IdentifyCUDAPreference(Caller,Callee) == CFP_Never;
158 // The CUDADisableTargetCallChecks short-circuits this check: we assume all
159 // cross-target calls are valid.
160 if (getLangOpts().CUDADisableTargetCallChecks)
163 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller),
164 CalleeTarget = IdentifyCUDATarget(Callee);
166 // If one of the targets is invalid, the check always fails, no matter what
167 // the other target is.
168 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
171 // CUDA B.1.1 "The __device__ qualifier declares a function that is [...]
172 // Callable from the device only."
173 if (CallerTarget == CFT_Host && CalleeTarget == CFT_Device)
176 // CUDA B.1.2 "The __global__ qualifier declares a function that is [...]
177 // Callable from the host only."
178 // CUDA B.1.3 "The __host__ qualifier declares a function that is [...]
179 // Callable from the host only."
180 if ((CallerTarget == CFT_Device || CallerTarget == CFT_Global) &&
181 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
184 // CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
185 // however, in which case the function is compiled for both the host and the
186 // device. The __CUDA_ARCH__ macro [...] can be used to differentiate code
187 // paths between host and device."
188 if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
189 // If the caller is implicit then the check always passes.
190 if (Caller->isImplicit()) return false;
192 bool InDeviceMode = getLangOpts().CUDAIsDevice;
193 if (!InDeviceMode && CalleeTarget != CFT_Host)
195 if (InDeviceMode && CalleeTarget != CFT_Device) {
196 // Allow host device functions to call host functions if explicitly
198 if (CalleeTarget == CFT_Host &&
199 getLangOpts().CUDAAllowHostCallsFromHostDevice) {
200 Diag(Caller->getLocation(),
201 diag::warn_host_calls_from_host_device)
202 << Callee->getNameAsString() << Caller->getNameAsString();
213 template <typename T, typename FetchDeclFn>
214 static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller,
215 llvm::SmallVectorImpl<T> &Matches,
216 FetchDeclFn FetchDecl) {
217 assert(S.getLangOpts().CUDATargetOverloads &&
218 "Should not be called w/o enabled target overloads.");
219 if (Matches.size() <= 1)
222 // Find the best call preference among the functions in Matches.
223 Sema::CUDAFunctionPreference P, BestCFP = Sema::CFP_Never;
224 for (auto const &Match : Matches) {
225 P = S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
230 // Erase all functions with lower priority.
231 for (unsigned I = 0, N = Matches.size(); I != N;)
232 if (S.IdentifyCUDAPreference(Caller, FetchDecl(Matches[I])) < BestCFP) {
233 Matches[I] = Matches[--N];
240 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
241 SmallVectorImpl<FunctionDecl *> &Matches){
242 EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
243 *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
246 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
247 SmallVectorImpl<DeclAccessPair> &Matches) {
248 EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
249 *this, Caller, Matches, [](const DeclAccessPair &item) {
250 return dyn_cast<FunctionDecl>(item.getDecl());
254 void Sema::EraseUnwantedCUDAMatches(
255 const FunctionDecl *Caller,
256 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
257 EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
258 *this, Caller, Matches,
259 [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
260 return dyn_cast<FunctionDecl>(item.second);
264 /// When an implicitly-declared special member has to invoke more than one
265 /// base/field special member, conflicts may occur in the targets of these
266 /// members. For example, if one base's member __host__ and another's is
267 /// __device__, it's a conflict.
268 /// This function figures out if the given targets \param Target1 and
269 /// \param Target2 conflict, and if they do not it fills in
270 /// \param ResolvedTarget with a target that resolves for both calls.
271 /// \return true if there's a conflict, false otherwise.
273 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
274 Sema::CUDAFunctionTarget Target2,
275 Sema::CUDAFunctionTarget *ResolvedTarget) {
276 if (Target1 == Sema::CFT_Global && Target2 == Sema::CFT_Global) {
277 // TODO: this shouldn't happen, really. Methods cannot be marked __global__.
278 // Clang should detect this earlier and produce an error. Then this
279 // condition can be changed to an assertion.
283 if (Target1 == Sema::CFT_HostDevice) {
284 *ResolvedTarget = Target2;
285 } else if (Target2 == Sema::CFT_HostDevice) {
286 *ResolvedTarget = Target1;
287 } else if (Target1 != Target2) {
290 *ResolvedTarget = Target1;
296 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
297 CXXSpecialMember CSM,
298 CXXMethodDecl *MemberDecl,
301 llvm::Optional<CUDAFunctionTarget> InferredTarget;
303 // We're going to invoke special member lookup; mark that these special
304 // members are called from this one, and not from its caller.
305 ContextRAII MethodContext(*this, MemberDecl);
307 // Look for special members in base classes that should be invoked from here.
308 // Infer the target of this member base on the ones it should call.
309 // Skip direct and indirect virtual bases for abstract classes.
310 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
311 for (const auto &B : ClassDecl->bases()) {
312 if (!B.isVirtual()) {
317 if (!ClassDecl->isAbstract()) {
318 for (const auto &VB : ClassDecl->vbases()) {
319 Bases.push_back(&VB);
323 for (const auto *B : Bases) {
324 const RecordType *BaseType = B->getType()->getAs<RecordType>();
329 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
330 Sema::SpecialMemberOverloadResult *SMOR =
331 LookupSpecialMember(BaseClassDecl, CSM,
332 /* ConstArg */ ConstRHS,
333 /* VolatileArg */ false,
334 /* RValueThis */ false,
335 /* ConstThis */ false,
336 /* VolatileThis */ false);
338 if (!SMOR || !SMOR->getMethod()) {
342 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod());
343 if (!InferredTarget.hasValue()) {
344 InferredTarget = BaseMethodTarget;
346 bool ResolutionError = resolveCalleeCUDATargetConflict(
347 InferredTarget.getValue(), BaseMethodTarget,
348 InferredTarget.getPointer());
349 if (ResolutionError) {
351 Diag(ClassDecl->getLocation(),
352 diag::note_implicit_member_target_infer_collision)
353 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
355 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
361 // Same as for bases, but now for special members of fields.
362 for (const auto *F : ClassDecl->fields()) {
363 if (F->isInvalidDecl()) {
367 const RecordType *FieldType =
368 Context.getBaseElementType(F->getType())->getAs<RecordType>();
373 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
374 Sema::SpecialMemberOverloadResult *SMOR =
375 LookupSpecialMember(FieldRecDecl, CSM,
376 /* ConstArg */ ConstRHS && !F->isMutable(),
377 /* VolatileArg */ false,
378 /* RValueThis */ false,
379 /* ConstThis */ false,
380 /* VolatileThis */ false);
382 if (!SMOR || !SMOR->getMethod()) {
386 CUDAFunctionTarget FieldMethodTarget =
387 IdentifyCUDATarget(SMOR->getMethod());
388 if (!InferredTarget.hasValue()) {
389 InferredTarget = FieldMethodTarget;
391 bool ResolutionError = resolveCalleeCUDATargetConflict(
392 InferredTarget.getValue(), FieldMethodTarget,
393 InferredTarget.getPointer());
394 if (ResolutionError) {
396 Diag(ClassDecl->getLocation(),
397 diag::note_implicit_member_target_infer_collision)
398 << (unsigned)CSM << InferredTarget.getValue()
399 << FieldMethodTarget;
401 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
407 if (InferredTarget.hasValue()) {
408 if (InferredTarget.getValue() == CFT_Device) {
409 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
410 } else if (InferredTarget.getValue() == CFT_Host) {
411 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
413 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
414 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
417 // If no target was inferred, mark this member as __host__ __device__;
418 // it's the least restrictive option that can be invoked from any target.
419 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
420 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));