contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp

   1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 /// \file
  10 /// \brief This file implements semantic analysis for CUDA constructs.
  11 ///
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "clang/Sema/Sema.h"
  15 #include "clang/AST/ASTContext.h"
  16 #include "clang/AST/Decl.h"
  17 #include "clang/Lex/Preprocessor.h"
  18 #include "clang/Sema/SemaDiagnostic.h"
  19 #include "llvm/ADT/Optional.h"
  20 #include "llvm/ADT/SmallVector.h"
  21 using namespace clang;
  22
  23 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
  24                                          MultiExprArg ExecConfig,
  25                                          SourceLocation GGGLoc) {
  26   FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
  27   if (!ConfigDecl)
  28     return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
  29                      << "cudaConfigureCall");
  30   QualType ConfigQTy = ConfigDecl->getType();
  31
  32   DeclRefExpr *ConfigDR = new (Context)
  33       DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
  34   MarkFunctionReferenced(LLLLoc, ConfigDecl);
  35
  36   return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
  37                        /*IsExecConfig=*/true);
  38 }
  39
  40 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
  41 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
  42   if (D->hasAttr<CUDAInvalidTargetAttr>())
  43     return CFT_InvalidTarget;
  44
  45   if (D->hasAttr<CUDAGlobalAttr>())
  46     return CFT_Global;
  47
  48   if (D->hasAttr<CUDADeviceAttr>()) {
  49     if (D->hasAttr<CUDAHostAttr>())
  50       return CFT_HostDevice;
  51     return CFT_Device;
  52   } else if (D->hasAttr<CUDAHostAttr>()) {
  53     return CFT_Host;
  54   } else if (D->isImplicit()) {
  55     // Some implicit declarations (like intrinsic functions) are not marked.
  56     // Set the most lenient target on them for maximal flexibility.
  57     return CFT_HostDevice;
  58   }
  59
  60   return CFT_Host;
  61 }
  62
  63 // * CUDA Call preference table
  64 //
  65 // F - from,
  66 // T - to
  67 // Ph - preference in host mode
  68 // Pd - preference in device mode
  69 // H  - handled in (x)
  70 // Preferences: b-best, f-fallback, l-last resort, n-never.
  71 //
  72 // | F  | T  | Ph | Pd |  H  |
  73 // |----+----+----+----+-----+
  74 // | d  | d  | b  | b  | (b) |
  75 // | d  | g  | n  | n  | (a) |
  76 // | d  | h  | l  | l  | (e) |
  77 // | d  | hd | f  | f  | (c) |
  78 // | g  | d  | b  | b  | (b) |
  79 // | g  | g  | n  | n  | (a) |
  80 // | g  | h  | l  | l  | (e) |
  81 // | g  | hd | f  | f  | (c) |
  82 // | h  | d  | l  | l  | (e) |
  83 // | h  | g  | b  | b  | (b) |
  84 // | h  | h  | b  | b  | (b) |
  85 // | h  | hd | f  | f  | (c) |
  86 // | hd | d  | l  | f  | (d) |
  87 // | hd | g  | f  | n  |(d/a)|
  88 // | hd | h  | f  | l  | (d) |
  89 // | hd | hd | b  | b  | (b) |
  90
  91 Sema::CUDAFunctionPreference
  92 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
  93                              const FunctionDecl *Callee) {
  94   assert(getLangOpts().CUDATargetOverloads &&
  95          "Should not be called w/o enabled target overloads.");
  96
  97   assert(Callee && "Callee must be valid.");
  98   CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
  99   CUDAFunctionTarget CallerTarget =
 100       (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
 101
 102   // If one of the targets is invalid, the check always fails, no matter what
 103   // the other target is.
 104   if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
 105     return CFP_Never;
 106
 107   // (a) Can't call global from some contexts until we support CUDA's
 108   // dynamic parallelism.
 109   if (CalleeTarget == CFT_Global &&
 110       (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
 111        (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
 112     return CFP_Never;
 113
 114   // (b) Best case scenarios
 115   if (CalleeTarget == CallerTarget ||
 116       (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
 117       (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
 118     return CFP_Best;
 119
 120   // (c) Calling HostDevice is OK as a fallback that works for everyone.
 121   if (CalleeTarget == CFT_HostDevice)
 122     return CFP_Fallback;
 123
 124   // Figure out what should be returned 'last resort' cases. Normally
 125   // those would not be allowed, but we'll consider them if
 126   // CUDADisableTargetCallChecks is true.
 127   CUDAFunctionPreference QuestionableResult =
 128       getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;
 129
 130   // (d) HostDevice behavior depends on compilation mode.
 131   if (CallerTarget == CFT_HostDevice) {
 132     // Calling a function that matches compilation mode is OK.
 133     // Calling a function from the other side is frowned upon.
 134     if (getLangOpts().CUDAIsDevice)
 135       return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
 136     else
 137       return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)
 138                  ? CFP_Fallback
 139                  : QuestionableResult;
 140   }
 141
 142   // (e) Calling across device/host boundary is not something you should do.
 143   if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
 144       (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
 145       (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
 146     return QuestionableResult;
 147
 148   llvm_unreachable("All cases should've been handled by now.");
 149 }
 150
 151 bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
 152                            const FunctionDecl *Callee) {
 153   // With target overloads enabled, we only disallow calling
 154   // combinations with CFP_Never.
 155   if (getLangOpts().CUDATargetOverloads)
 156     return IdentifyCUDAPreference(Caller,Callee) == CFP_Never;
 157
 158   // The CUDADisableTargetCallChecks short-circuits this check: we assume all
 159   // cross-target calls are valid.
 160   if (getLangOpts().CUDADisableTargetCallChecks)
 161     return false;
 162
 163   CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller),
 164                      CalleeTarget = IdentifyCUDATarget(Callee);
 165
 166   // If one of the targets is invalid, the check always fails, no matter what
 167   // the other target is.
 168   if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
 169     return true;
 170
 171   // CUDA B.1.1 "The __device__ qualifier declares a function that is [...]
 172   // Callable from the device only."
 173   if (CallerTarget == CFT_Host && CalleeTarget == CFT_Device)
 174     return true;
 175
 176   // CUDA B.1.2 "The __global__ qualifier declares a function that is [...]
 177   // Callable from the host only."
 178   // CUDA B.1.3 "The __host__ qualifier declares a function that is [...]
 179   // Callable from the host only."
 180   if ((CallerTarget == CFT_Device || CallerTarget == CFT_Global) &&
 181       (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
 182     return true;
 183
 184   // CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
 185   // however, in which case the function is compiled for both the host and the
 186   // device. The __CUDA_ARCH__ macro [...] can be used to differentiate code
 187   // paths between host and device."
 188   if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
 189     // If the caller is implicit then the check always passes.
 190     if (Caller->isImplicit()) return false;
 191
 192     bool InDeviceMode = getLangOpts().CUDAIsDevice;
 193     if (!InDeviceMode && CalleeTarget != CFT_Host)
 194         return true;
 195     if (InDeviceMode && CalleeTarget != CFT_Device) {
 196       // Allow host device functions to call host functions if explicitly
 197       // requested.
 198       if (CalleeTarget == CFT_Host &&
 199           getLangOpts().CUDAAllowHostCallsFromHostDevice) {
 200         Diag(Caller->getLocation(),
 201              diag::warn_host_calls_from_host_device)
 202             << Callee->getNameAsString() << Caller->getNameAsString();
 203         return false;
 204       }
 205
 206       return true;
 207     }
 208   }
 209
 210   return false;
 211 }
 212
 213 template <typename T, typename FetchDeclFn>
 214 static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller,
 215                                          llvm::SmallVectorImpl<T> &Matches,
 216                                          FetchDeclFn FetchDecl) {
 217   assert(S.getLangOpts().CUDATargetOverloads &&
 218          "Should not be called w/o enabled target overloads.");
 219   if (Matches.size() <= 1)
 220     return;
 221
 222   // Find the best call preference among the functions in Matches.
 223   Sema::CUDAFunctionPreference P, BestCFP = Sema::CFP_Never;
 224   for (auto const &Match : Matches) {
 225     P = S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
 226     if (P > BestCFP)
 227       BestCFP = P;
 228   }
 229
 230   // Erase all functions with lower priority.
 231   for (unsigned I = 0, N = Matches.size(); I != N;)
 232     if (S.IdentifyCUDAPreference(Caller, FetchDecl(Matches[I])) < BestCFP) {
 233       Matches[I] = Matches[--N];
 234       Matches.resize(N);
 235     } else {
 236       ++I;
 237     }
 238 }
 239
 240 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
 241                                     SmallVectorImpl<FunctionDecl *> &Matches){
 242   EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
 243       *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
 244 }
 245
 246 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
 247                                     SmallVectorImpl<DeclAccessPair> &Matches) {
 248   EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
 249       *this, Caller, Matches, [](const DeclAccessPair &item) {
 250         return dyn_cast<FunctionDecl>(item.getDecl());
 251       });
 252 }
 253
 254 void Sema::EraseUnwantedCUDAMatches(
 255     const FunctionDecl *Caller,
 256     SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
 257   EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
 258       *this, Caller, Matches,
 259       [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
 260         return dyn_cast<FunctionDecl>(item.second);
 261       });
 262 }
 263
 264 /// When an implicitly-declared special member has to invoke more than one
 265 /// base/field special member, conflicts may occur in the targets of these
 266 /// members. For example, if one base's member __host__ and another's is
 267 /// __device__, it's a conflict.
 268 /// This function figures out if the given targets \param Target1 and
 269 /// \param Target2 conflict, and if they do not it fills in
 270 /// \param ResolvedTarget with a target that resolves for both calls.
 271 /// \return true if there's a conflict, false otherwise.
 272 static bool
 273 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
 274                                 Sema::CUDAFunctionTarget Target2,
 275                                 Sema::CUDAFunctionTarget *ResolvedTarget) {
 276   if (Target1 == Sema::CFT_Global && Target2 == Sema::CFT_Global) {
 277     // TODO: this shouldn't happen, really. Methods cannot be marked __global__.
 278     // Clang should detect this earlier and produce an error. Then this
 279     // condition can be changed to an assertion.
 280     return true;
 281   }
 282
 283   if (Target1 == Sema::CFT_HostDevice) {
 284     *ResolvedTarget = Target2;
 285   } else if (Target2 == Sema::CFT_HostDevice) {
 286     *ResolvedTarget = Target1;
 287   } else if (Target1 != Target2) {
 288     return true;
 289   } else {
 290     *ResolvedTarget = Target1;
 291   }
 292
 293   return false;
 294 }
 295
 296 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
 297                                                    CXXSpecialMember CSM,
 298                                                    CXXMethodDecl *MemberDecl,
 299                                                    bool ConstRHS,
 300                                                    bool Diagnose) {
 301   llvm::Optional<CUDAFunctionTarget> InferredTarget;
 302
 303   // We're going to invoke special member lookup; mark that these special
 304   // members are called from this one, and not from its caller.
 305   ContextRAII MethodContext(*this, MemberDecl);
 306
 307   // Look for special members in base classes that should be invoked from here.
 308   // Infer the target of this member base on the ones it should call.
 309   // Skip direct and indirect virtual bases for abstract classes.
 310   llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
 311   for (const auto &B : ClassDecl->bases()) {
 312     if (!B.isVirtual()) {
 313       Bases.push_back(&B);
 314     }
 315   }
 316
 317   if (!ClassDecl->isAbstract()) {
 318     for (const auto &VB : ClassDecl->vbases()) {
 319       Bases.push_back(&VB);
 320     }
 321   }
 322
 323   for (const auto *B : Bases) {
 324     const RecordType *BaseType = B->getType()->getAs<RecordType>();
 325     if (!BaseType) {
 326       continue;
 327     }
 328
 329     CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
 330     Sema::SpecialMemberOverloadResult *SMOR =
 331         LookupSpecialMember(BaseClassDecl, CSM,
 332                             /* ConstArg */ ConstRHS,
 333                             /* VolatileArg */ false,
 334                             /* RValueThis */ false,
 335                             /* ConstThis */ false,
 336                             /* VolatileThis */ false);
 337
 338     if (!SMOR || !SMOR->getMethod()) {
 339       continue;
 340     }
 341
 342     CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod());
 343     if (!InferredTarget.hasValue()) {
 344       InferredTarget = BaseMethodTarget;
 345     } else {
 346       bool ResolutionError = resolveCalleeCUDATargetConflict(
 347           InferredTarget.getValue(), BaseMethodTarget,
 348           InferredTarget.getPointer());
 349       if (ResolutionError) {
 350         if (Diagnose) {
 351           Diag(ClassDecl->getLocation(),
 352                diag::note_implicit_member_target_infer_collision)
 353               << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
 354         }
 355         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
 356         return true;
 357       }
 358     }
 359   }
 360
 361   // Same as for bases, but now for special members of fields.
 362   for (const auto *F : ClassDecl->fields()) {
 363     if (F->isInvalidDecl()) {
 364       continue;
 365     }
 366
 367     const RecordType *FieldType =
 368         Context.getBaseElementType(F->getType())->getAs<RecordType>();
 369     if (!FieldType) {
 370       continue;
 371     }
 372
 373     CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
 374     Sema::SpecialMemberOverloadResult *SMOR =
 375         LookupSpecialMember(FieldRecDecl, CSM,
 376                             /* ConstArg */ ConstRHS && !F->isMutable(),
 377                             /* VolatileArg */ false,
 378                             /* RValueThis */ false,
 379                             /* ConstThis */ false,
 380                             /* VolatileThis */ false);
 381
 382     if (!SMOR || !SMOR->getMethod()) {
 383       continue;
 384     }
 385
 386     CUDAFunctionTarget FieldMethodTarget =
 387         IdentifyCUDATarget(SMOR->getMethod());
 388     if (!InferredTarget.hasValue()) {
 389       InferredTarget = FieldMethodTarget;
 390     } else {
 391       bool ResolutionError = resolveCalleeCUDATargetConflict(
 392           InferredTarget.getValue(), FieldMethodTarget,
 393           InferredTarget.getPointer());
 394       if (ResolutionError) {
 395         if (Diagnose) {
 396           Diag(ClassDecl->getLocation(),
 397                diag::note_implicit_member_target_infer_collision)
 398               << (unsigned)CSM << InferredTarget.getValue()
 399               << FieldMethodTarget;
 400         }
 401         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
 402         return true;
 403       }
 404     }
 405   }
 406
 407   if (InferredTarget.hasValue()) {
 408     if (InferredTarget.getValue() == CFT_Device) {
 409       MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
 410     } else if (InferredTarget.getValue() == CFT_Host) {
 411       MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
 412     } else {
 413       MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
 414       MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
 415     }
 416   } else {
 417     // If no target was inferred, mark this member as __host__ __device__;
 418     // it's the least restrictive option that can be invoked from any target.
 419     MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
 420     MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
 421   }
 422
 423   return false;
 424 }