Home | History | Annotate | Download | only in Sema
      1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 /// \file
     10 /// \brief This file implements semantic analysis for CUDA constructs.
     11 ///
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "clang/AST/ASTContext.h"
     15 #include "clang/AST/Decl.h"
     16 #include "clang/AST/ExprCXX.h"
     17 #include "clang/Lex/Preprocessor.h"
     18 #include "clang/Sema/Lookup.h"
     19 #include "clang/Sema/Sema.h"
     20 #include "clang/Sema/SemaDiagnostic.h"
     21 #include "clang/Sema/Template.h"
     22 #include "llvm/ADT/Optional.h"
     23 #include "llvm/ADT/SmallVector.h"
     24 using namespace clang;
     25 
     26 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
     27                                          MultiExprArg ExecConfig,
     28                                          SourceLocation GGGLoc) {
     29   FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
     30   if (!ConfigDecl)
     31     return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
     32                      << "cudaConfigureCall");
     33   QualType ConfigQTy = ConfigDecl->getType();
     34 
     35   DeclRefExpr *ConfigDR = new (Context)
     36       DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
     37   MarkFunctionReferenced(LLLLoc, ConfigDecl);
     38 
     39   return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
     40                        /*IsExecConfig=*/true);
     41 }
     42 
     43 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
     44 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
     45   if (D->hasAttr<CUDAInvalidTargetAttr>())
     46     return CFT_InvalidTarget;
     47 
     48   if (D->hasAttr<CUDAGlobalAttr>())
     49     return CFT_Global;
     50 
     51   if (D->hasAttr<CUDADeviceAttr>()) {
     52     if (D->hasAttr<CUDAHostAttr>())
     53       return CFT_HostDevice;
     54     return CFT_Device;
     55   } else if (D->hasAttr<CUDAHostAttr>()) {
     56     return CFT_Host;
     57   } else if (D->isImplicit()) {
     58     // Some implicit declarations (like intrinsic functions) are not marked.
     59     // Set the most lenient target on them for maximal flexibility.
     60     return CFT_HostDevice;
     61   }
     62 
     63   return CFT_Host;
     64 }
     65 
     66 // * CUDA Call preference table
     67 //
     68 // F - from,
     69 // T - to
     70 // Ph - preference in host mode
     71 // Pd - preference in device mode
     72 // H  - handled in (x)
     73 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
     74 //
     75 // | F  | T  | Ph  | Pd  |  H  |
     76 // |----+----+-----+-----+-----+
     77 // | d  | d  | N   | N   | (c) |
     78 // | d  | g  | --  | --  | (a) |
     79 // | d  | h  | --  | --  | (e) |
     80 // | d  | hd | HD  | HD  | (b) |
     81 // | g  | d  | N   | N   | (c) |
     82 // | g  | g  | --  | --  | (a) |
     83 // | g  | h  | --  | --  | (e) |
     84 // | g  | hd | HD  | HD  | (b) |
     85 // | h  | d  | --  | --  | (e) |
     86 // | h  | g  | N   | N   | (c) |
     87 // | h  | h  | N   | N   | (c) |
     88 // | h  | hd | HD  | HD  | (b) |
     89 // | hd | d  | WS  | SS  | (d) |
     90 // | hd | g  | SS  | --  |(d/a)|
     91 // | hd | h  | SS  | WS  | (d) |
     92 // | hd | hd | HD  | HD  | (b) |
     93 
     94 Sema::CUDAFunctionPreference
     95 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
     96                              const FunctionDecl *Callee) {
     97   assert(Callee && "Callee must be valid.");
     98   CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
     99   CUDAFunctionTarget CallerTarget =
    100       (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
    101 
    102   // If one of the targets is invalid, the check always fails, no matter what
    103   // the other target is.
    104   if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
    105     return CFP_Never;
    106 
    107   // (a) Can't call global from some contexts until we support CUDA's
    108   // dynamic parallelism.
    109   if (CalleeTarget == CFT_Global &&
    110       (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
    111        (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
    112     return CFP_Never;
    113 
    114   // (b) Calling HostDevice is OK for everyone.
    115   if (CalleeTarget == CFT_HostDevice)
    116     return CFP_HostDevice;
    117 
    118   // (c) Best case scenarios
    119   if (CalleeTarget == CallerTarget ||
    120       (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
    121       (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
    122     return CFP_Native;
    123 
    124   // (d) HostDevice behavior depends on compilation mode.
    125   if (CallerTarget == CFT_HostDevice) {
    126     // It's OK to call a compilation-mode matching function from an HD one.
    127     if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
    128         (!getLangOpts().CUDAIsDevice &&
    129          (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
    130       return CFP_SameSide;
    131 
    132     // Calls from HD to non-mode-matching functions (i.e., to host functions
    133     // when compiling in device mode or to device functions when compiling in
    134     // host mode) are allowed at the sema level, but eventually rejected if
    135     // they're ever codegened.  TODO: Reject said calls earlier.
    136     return CFP_WrongSide;
    137   }
    138 
    139   // (e) Calling across device/host boundary is not something you should do.
    140   if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
    141       (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
    142       (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
    143     return CFP_Never;
    144 
    145   llvm_unreachable("All cases should've been handled by now.");
    146 }
    147 
    148 template <typename T>
    149 static void EraseUnwantedCUDAMatchesImpl(
    150     Sema &S, const FunctionDecl *Caller, llvm::SmallVectorImpl<T> &Matches,
    151     std::function<const FunctionDecl *(const T &)> FetchDecl) {
    152   if (Matches.size() <= 1)
    153     return;
    154 
    155   // Gets the CUDA function preference for a call from Caller to Match.
    156   auto GetCFP = [&](const T &Match) {
    157     return S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
    158   };
    159 
    160   // Find the best call preference among the functions in Matches.
    161   Sema::CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
    162       Matches.begin(), Matches.end(),
    163       [&](const T &M1, const T &M2) { return GetCFP(M1) < GetCFP(M2); }));
    164 
    165   // Erase all functions with lower priority.
    166   Matches.erase(
    167       llvm::remove_if(Matches,
    168                       [&](const T &Match) { return GetCFP(Match) < BestCFP; }),
    169       Matches.end());
    170 }
    171 
    172 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
    173                                     SmallVectorImpl<FunctionDecl *> &Matches){
    174   EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
    175       *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
    176 }
    177 
    178 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
    179                                     SmallVectorImpl<DeclAccessPair> &Matches) {
    180   EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
    181       *this, Caller, Matches, [](const DeclAccessPair &item) {
    182         return dyn_cast<FunctionDecl>(item.getDecl());
    183       });
    184 }
    185 
    186 void Sema::EraseUnwantedCUDAMatches(
    187     const FunctionDecl *Caller,
    188     SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
    189   EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
    190       *this, Caller, Matches,
    191       [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
    192         return dyn_cast<FunctionDecl>(item.second);
    193       });
    194 }
    195 
    196 /// When an implicitly-declared special member has to invoke more than one
    197 /// base/field special member, conflicts may occur in the targets of these
    198 /// members. For example, if one base's member __host__ and another's is
    199 /// __device__, it's a conflict.
    200 /// This function figures out if the given targets \param Target1 and
    201 /// \param Target2 conflict, and if they do not it fills in
    202 /// \param ResolvedTarget with a target that resolves for both calls.
    203 /// \return true if there's a conflict, false otherwise.
    204 static bool
    205 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
    206                                 Sema::CUDAFunctionTarget Target2,
    207                                 Sema::CUDAFunctionTarget *ResolvedTarget) {
    208   // Only free functions and static member functions may be global.
    209   assert(Target1 != Sema::CFT_Global);
    210   assert(Target2 != Sema::CFT_Global);
    211 
    212   if (Target1 == Sema::CFT_HostDevice) {
    213     *ResolvedTarget = Target2;
    214   } else if (Target2 == Sema::CFT_HostDevice) {
    215     *ResolvedTarget = Target1;
    216   } else if (Target1 != Target2) {
    217     return true;
    218   } else {
    219     *ResolvedTarget = Target1;
    220   }
    221 
    222   return false;
    223 }
    224 
    225 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
    226                                                    CXXSpecialMember CSM,
    227                                                    CXXMethodDecl *MemberDecl,
    228                                                    bool ConstRHS,
    229                                                    bool Diagnose) {
    230   llvm::Optional<CUDAFunctionTarget> InferredTarget;
    231 
    232   // We're going to invoke special member lookup; mark that these special
    233   // members are called from this one, and not from its caller.
    234   ContextRAII MethodContext(*this, MemberDecl);
    235 
    236   // Look for special members in base classes that should be invoked from here.
    237   // Infer the target of this member base on the ones it should call.
    238   // Skip direct and indirect virtual bases for abstract classes.
    239   llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
    240   for (const auto &B : ClassDecl->bases()) {
    241     if (!B.isVirtual()) {
    242       Bases.push_back(&B);
    243     }
    244   }
    245 
    246   if (!ClassDecl->isAbstract()) {
    247     for (const auto &VB : ClassDecl->vbases()) {
    248       Bases.push_back(&VB);
    249     }
    250   }
    251 
    252   for (const auto *B : Bases) {
    253     const RecordType *BaseType = B->getType()->getAs<RecordType>();
    254     if (!BaseType) {
    255       continue;
    256     }
    257 
    258     CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
    259     Sema::SpecialMemberOverloadResult *SMOR =
    260         LookupSpecialMember(BaseClassDecl, CSM,
    261                             /* ConstArg */ ConstRHS,
    262                             /* VolatileArg */ false,
    263                             /* RValueThis */ false,
    264                             /* ConstThis */ false,
    265                             /* VolatileThis */ false);
    266 
    267     if (!SMOR || !SMOR->getMethod()) {
    268       continue;
    269     }
    270 
    271     CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod());
    272     if (!InferredTarget.hasValue()) {
    273       InferredTarget = BaseMethodTarget;
    274     } else {
    275       bool ResolutionError = resolveCalleeCUDATargetConflict(
    276           InferredTarget.getValue(), BaseMethodTarget,
    277           InferredTarget.getPointer());
    278       if (ResolutionError) {
    279         if (Diagnose) {
    280           Diag(ClassDecl->getLocation(),
    281                diag::note_implicit_member_target_infer_collision)
    282               << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
    283         }
    284         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
    285         return true;
    286       }
    287     }
    288   }
    289 
    290   // Same as for bases, but now for special members of fields.
    291   for (const auto *F : ClassDecl->fields()) {
    292     if (F->isInvalidDecl()) {
    293       continue;
    294     }
    295 
    296     const RecordType *FieldType =
    297         Context.getBaseElementType(F->getType())->getAs<RecordType>();
    298     if (!FieldType) {
    299       continue;
    300     }
    301 
    302     CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
    303     Sema::SpecialMemberOverloadResult *SMOR =
    304         LookupSpecialMember(FieldRecDecl, CSM,
    305                             /* ConstArg */ ConstRHS && !F->isMutable(),
    306                             /* VolatileArg */ false,
    307                             /* RValueThis */ false,
    308                             /* ConstThis */ false,
    309                             /* VolatileThis */ false);
    310 
    311     if (!SMOR || !SMOR->getMethod()) {
    312       continue;
    313     }
    314 
    315     CUDAFunctionTarget FieldMethodTarget =
    316         IdentifyCUDATarget(SMOR->getMethod());
    317     if (!InferredTarget.hasValue()) {
    318       InferredTarget = FieldMethodTarget;
    319     } else {
    320       bool ResolutionError = resolveCalleeCUDATargetConflict(
    321           InferredTarget.getValue(), FieldMethodTarget,
    322           InferredTarget.getPointer());
    323       if (ResolutionError) {
    324         if (Diagnose) {
    325           Diag(ClassDecl->getLocation(),
    326                diag::note_implicit_member_target_infer_collision)
    327               << (unsigned)CSM << InferredTarget.getValue()
    328               << FieldMethodTarget;
    329         }
    330         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
    331         return true;
    332       }
    333     }
    334   }
    335 
    336   if (InferredTarget.hasValue()) {
    337     if (InferredTarget.getValue() == CFT_Device) {
    338       MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
    339     } else if (InferredTarget.getValue() == CFT_Host) {
    340       MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
    341     } else {
    342       MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
    343       MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
    344     }
    345   } else {
    346     // If no target was inferred, mark this member as __host__ __device__;
    347     // it's the least restrictive option that can be invoked from any target.
    348     MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
    349     MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
    350   }
    351 
    352   return false;
    353 }
    354 
    355 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
    356   if (!CD->isDefined() && CD->isTemplateInstantiation())
    357     InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
    358 
    359   // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
    360   // empty at a point in the translation unit, if it is either a
    361   // trivial constructor
    362   if (CD->isTrivial())
    363     return true;
    364 
    365   // ... or it satisfies all of the following conditions:
    366   // The constructor function has been defined.
    367   // The constructor function has no parameters,
    368   // and the function body is an empty compound statement.
    369   if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
    370     return false;
    371 
    372   // Its class has no virtual functions and no virtual base classes.
    373   if (CD->getParent()->isDynamicClass())
    374     return false;
    375 
    376   // The only form of initializer allowed is an empty constructor.
    377   // This will recursively check all base classes and member initializers
    378   if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
    379         if (const CXXConstructExpr *CE =
    380                 dyn_cast<CXXConstructExpr>(CI->getInit()))
    381           return isEmptyCudaConstructor(Loc, CE->getConstructor());
    382         return false;
    383       }))
    384     return false;
    385 
    386   return true;
    387 }
    388 
    389 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
    390   // No destructor -> no problem.
    391   if (!DD)
    392     return true;
    393 
    394   if (!DD->isDefined() && DD->isTemplateInstantiation())
    395     InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
    396 
    397   // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
    398   // empty at a point in the translation unit, if it is either a
    399   // trivial constructor
    400   if (DD->isTrivial())
    401     return true;
    402 
    403   // ... or it satisfies all of the following conditions:
    404   // The destructor function has been defined.
    405   // and the function body is an empty compound statement.
    406   if (!DD->hasTrivialBody())
    407     return false;
    408 
    409   const CXXRecordDecl *ClassDecl = DD->getParent();
    410 
    411   // Its class has no virtual functions and no virtual base classes.
    412   if (ClassDecl->isDynamicClass())
    413     return false;
    414 
    415   // Only empty destructors are allowed. This will recursively check
    416   // destructors for all base classes...
    417   if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
    418         if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
    419           return isEmptyCudaDestructor(Loc, RD->getDestructor());
    420         return true;
    421       }))
    422     return false;
    423 
    424   // ... and member fields.
    425   if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
    426         if (CXXRecordDecl *RD = Field->getType()
    427                                     ->getBaseElementTypeUnsafe()
    428                                     ->getAsCXXRecordDecl())
    429           return isEmptyCudaDestructor(Loc, RD->getDestructor());
    430         return true;
    431       }))
    432     return false;
    433 
    434   return true;
    435 }
    436 
    437 // With -fcuda-host-device-constexpr, an unattributed constexpr function is
    438 // treated as implicitly __host__ __device__, unless:
    439 //  * it is a variadic function (device-side variadic functions are not
    440 //    allowed), or
    441 //  * a __device__ function with this signature was already declared, in which
    442 //    case in which case we output an error, unless the __device__ decl is in a
    443 //    system header, in which case we leave the constexpr function unattributed.
    444 void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
    445                                        const LookupResult &Previous) {
    446   assert(getLangOpts().CUDA && "May be called only for CUDA compilations.");
    447   if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
    448       NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
    449       NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
    450     return;
    451 
    452   // Is D a __device__ function with the same signature as NewD, ignoring CUDA
    453   // attributes?
    454   auto IsMatchingDeviceFn = [&](NamedDecl *D) {
    455     if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
    456       D = Using->getTargetDecl();
    457     FunctionDecl *OldD = D->getAsFunction();
    458     return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
    459            !OldD->hasAttr<CUDAHostAttr>() &&
    460            !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
    461                        /* ConsiderCudaAttrs = */ false);
    462   };
    463   auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
    464   if (It != Previous.end()) {
    465     // We found a __device__ function with the same name and signature as NewD
    466     // (ignoring CUDA attrs).  This is an error unless that function is defined
    467     // in a system header, in which case we simply return without making NewD
    468     // host+device.
    469     NamedDecl *Match = *It;
    470     if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
    471       Diag(NewD->getLocation(),
    472            diag::err_cuda_unattributed_constexpr_cannot_overload_device)
    473           << NewD->getName();
    474       Diag(Match->getLocation(),
    475            diag::note_cuda_conflicting_device_function_declared_here);
    476     }
    477     return;
    478   }
    479 
    480   NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
    481   NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
    482 }
    483