Home | History | Annotate | Download | only in Basic
      1 //===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief Defines the clang::IdentifierInfo, clang::IdentifierTable, and
     12 /// clang::Selector interfaces.
     13 ///
     14 //===----------------------------------------------------------------------===//
     15 
     16 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
     17 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
     18 
     19 #include "clang/Basic/LLVM.h"
     20 #include "clang/Basic/TokenKinds.h"
     21 #include "llvm/ADT/SmallString.h"
     22 #include "llvm/ADT/StringMap.h"
     23 #include "llvm/ADT/StringRef.h"
     24 #include "llvm/Support/Allocator.h"
     25 #include <cassert>
     26 #include <cstddef>
     27 #include <cstdint>
     28 #include <cstring>
     29 #include <new>
     30 #include <string>
     31 #include <utility>
     32 
     33 namespace llvm {
     34 
     35   template <typename T> struct DenseMapInfo;
     36 
     37 } // end namespace llvm
     38 
     39 namespace clang {
     40 
     41   class LangOptions;
     42   class IdentifierInfo;
     43   class IdentifierTable;
     44   class SourceLocation;
     45   class MultiKeywordSelector; // private class used by Selector
     46   class DeclarationName;      // AST class that stores declaration names
     47 
     48   /// \brief A simple pair of identifier info and location.
     49   typedef std::pair<IdentifierInfo*, SourceLocation> IdentifierLocPair;
     50 
     51 /// One of these records is kept for each identifier that
     52 /// is lexed.  This contains information about whether the token was \#define'd,
     53 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
     54 /// variable or function name).  The preprocessor keeps this information in a
     55 /// set, and all tok::identifier tokens have a pointer to one of these.
     56 class IdentifierInfo {
     57   friend class IdentifierTable;
     58 
     59   unsigned TokenID            : 9; // Front-end token ID or tok::identifier.
     60   // Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
     61   // First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values
     62   // are for builtins.
     63   unsigned ObjCOrBuiltinID    :13;
     64   bool HasMacro               : 1; // True if there is a #define for this.
     65   bool HadMacro               : 1; // True if there was a #define for this.
     66   bool IsExtension            : 1; // True if identifier is a lang extension.
     67   bool IsFutureCompatKeyword  : 1; // True if identifier is a keyword in a
     68                                    // newer Standard or proposed Standard.
     69   bool IsPoisoned             : 1; // True if identifier is poisoned.
     70   bool IsCPPOperatorKeyword   : 1; // True if ident is a C++ operator keyword.
     71   bool NeedsHandleIdentifier  : 1; // See "RecomputeNeedsHandleIdentifier".
     72   bool IsFromAST              : 1; // True if identifier was loaded (at least
     73                                    // partially) from an AST file.
     74   bool ChangedAfterLoad       : 1; // True if identifier has changed from the
     75                                    // definition loaded from an AST file.
     76   bool FEChangedAfterLoad     : 1; // True if identifier's frontend information
     77                                    // has changed from the definition loaded
     78                                    // from an AST file.
     79   bool RevertedTokenID        : 1; // True if revertTokenIDToIdentifier was
     80                                    // called.
     81   bool OutOfDate              : 1; // True if there may be additional
     82                                    // information about this identifier
     83                                    // stored externally.
     84   bool IsModulesImport        : 1; // True if this is the 'import' contextual
     85                                    // keyword.
     86   // 29 bit left in 64-bit word.
     87 
     88   void *FETokenInfo;               // Managed by the language front-end.
     89   llvm::StringMapEntry<IdentifierInfo*> *Entry;
     90 
     91 public:
     92   IdentifierInfo();
     93   IdentifierInfo(const IdentifierInfo &) = delete;
     94   IdentifierInfo &operator=(const IdentifierInfo &) = delete;
     95 
     96   /// \brief Return true if this is the identifier for the specified string.
     97   ///
     98   /// This is intended to be used for string literals only: II->isStr("foo").
     99   template <std::size_t StrLen>
    100   bool isStr(const char (&Str)[StrLen]) const {
    101     return getLength() == StrLen-1 &&
    102            memcmp(getNameStart(), Str, StrLen-1) == 0;
    103   }
    104 
    105   /// \brief Return the beginning of the actual null-terminated string for this
    106   /// identifier.
    107   ///
    108   const char *getNameStart() const {
    109     if (Entry) return Entry->getKeyData();
    110     // FIXME: This is gross. It would be best not to embed specific details
    111     // of the PTH file format here.
    112     // The 'this' pointer really points to a
    113     // std::pair<IdentifierInfo, const char*>, where internal pointer
    114     // points to the external string data.
    115     typedef std::pair<IdentifierInfo, const char*> actualtype;
    116     return ((const actualtype*) this)->second;
    117   }
    118 
    119   /// \brief Efficiently return the length of this identifier info.
    120   ///
    121   unsigned getLength() const {
    122     if (Entry) return Entry->getKeyLength();
    123     // FIXME: This is gross. It would be best not to embed specific details
    124     // of the PTH file format here.
    125     // The 'this' pointer really points to a
    126     // std::pair<IdentifierInfo, const char*>, where internal pointer
    127     // points to the external string data.
    128     typedef std::pair<IdentifierInfo, const char*> actualtype;
    129     const char* p = ((const actualtype*) this)->second - 2;
    130     return (((unsigned) p[0]) | (((unsigned) p[1]) << 8)) - 1;
    131   }
    132 
    133   /// \brief Return the actual identifier string.
    134   StringRef getName() const {
    135     return StringRef(getNameStart(), getLength());
    136   }
    137 
    138   /// \brief Return true if this identifier is \#defined to some other value.
    139   /// \note The current definition may be in a module and not currently visible.
    140   bool hasMacroDefinition() const {
    141     return HasMacro;
    142   }
    143   void setHasMacroDefinition(bool Val) {
    144     if (HasMacro == Val) return;
    145 
    146     HasMacro = Val;
    147     if (Val) {
    148       NeedsHandleIdentifier = true;
    149       HadMacro = true;
    150     } else {
    151       RecomputeNeedsHandleIdentifier();
    152     }
    153   }
    154   /// \brief Returns true if this identifier was \#defined to some value at any
    155   /// moment. In this case there should be an entry for the identifier in the
    156   /// macro history table in Preprocessor.
    157   bool hadMacroDefinition() const {
    158     return HadMacro;
    159   }
    160 
    161   /// If this is a source-language token (e.g. 'for'), this API
    162   /// can be used to cause the lexer to map identifiers to source-language
    163   /// tokens.
    164   tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
    165 
    166   /// \brief True if revertTokenIDToIdentifier() was called.
    167   bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
    168 
    169   /// \brief Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
    170   /// compatibility.
    171   ///
    172   /// TokenID is normally read-only but there are 2 instances where we revert it
    173   /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
    174   /// using this method so we can inform serialization about it.
    175   void revertTokenIDToIdentifier() {
    176     assert(TokenID != tok::identifier && "Already at tok::identifier");
    177     TokenID = tok::identifier;
    178     RevertedTokenID = true;
    179   }
    180   void revertIdentifierToTokenID(tok::TokenKind TK) {
    181     assert(TokenID == tok::identifier && "Should be at tok::identifier");
    182     TokenID = TK;
    183     RevertedTokenID = false;
    184   }
    185 
    186   /// \brief Return the preprocessor keyword ID for this identifier.
    187   ///
    188   /// For example, "define" will return tok::pp_define.
    189   tok::PPKeywordKind getPPKeywordID() const;
    190 
    191   /// \brief Return the Objective-C keyword ID for the this identifier.
    192   ///
    193   /// For example, 'class' will return tok::objc_class if ObjC is enabled.
    194   tok::ObjCKeywordKind getObjCKeywordID() const {
    195     if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
    196       return tok::ObjCKeywordKind(ObjCOrBuiltinID);
    197     else
    198       return tok::objc_not_keyword;
    199   }
    200   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
    201 
    202   /// \brief True if setNotBuiltin() was called.
    203   bool hasRevertedBuiltin() const {
    204     return ObjCOrBuiltinID == tok::NUM_OBJC_KEYWORDS;
    205   }
    206 
    207   /// \brief Revert the identifier to a non-builtin identifier. We do this if
    208   /// the name of a known builtin library function is used to declare that
    209   /// function, but an unexpected type is specified.
    210   void revertBuiltin() {
    211     setBuiltinID(0);
    212   }
    213 
    214   /// \brief Return a value indicating whether this is a builtin function.
    215   ///
    216   /// 0 is not-built-in. 1+ are specific builtin functions.
    217   unsigned getBuiltinID() const {
    218     if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
    219       return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
    220     else
    221       return 0;
    222   }
    223   void setBuiltinID(unsigned ID) {
    224     ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
    225     assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
    226            && "ID too large for field!");
    227   }
    228 
    229   unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
    230   void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
    231 
    232   /// get/setExtension - Initialize information about whether or not this
    233   /// language token is an extension.  This controls extension warnings, and is
    234   /// only valid if a custom token ID is set.
    235   bool isExtensionToken() const { return IsExtension; }
    236   void setIsExtensionToken(bool Val) {
    237     IsExtension = Val;
    238     if (Val)
    239       NeedsHandleIdentifier = true;
    240     else
    241       RecomputeNeedsHandleIdentifier();
    242   }
    243 
    244   /// is/setIsFutureCompatKeyword - Initialize information about whether or not
    245   /// this language token is a keyword in a newer or proposed Standard. This
    246   /// controls compatibility warnings, and is only true when not parsing the
    247   /// corresponding Standard. Once a compatibility problem has been diagnosed
    248   /// with this keyword, the flag will be cleared.
    249   bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
    250   void setIsFutureCompatKeyword(bool Val) {
    251     IsFutureCompatKeyword = Val;
    252     if (Val)
    253       NeedsHandleIdentifier = true;
    254     else
    255       RecomputeNeedsHandleIdentifier();
    256   }
    257 
    258   /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
    259   /// Preprocessor will emit an error every time this token is used.
    260   void setIsPoisoned(bool Value = true) {
    261     IsPoisoned = Value;
    262     if (Value)
    263       NeedsHandleIdentifier = true;
    264     else
    265       RecomputeNeedsHandleIdentifier();
    266   }
    267 
    268   /// \brief Return true if this token has been poisoned.
    269   bool isPoisoned() const { return IsPoisoned; }
    270 
    271   /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
    272   /// this identifier is a C++ alternate representation of an operator.
    273   void setIsCPlusPlusOperatorKeyword(bool Val = true) {
    274     IsCPPOperatorKeyword = Val;
    275   }
    276   bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
    277 
    278   /// \brief Return true if this token is a keyword in the specified language.
    279   bool isKeyword(const LangOptions &LangOpts) const;
    280 
    281   /// \brief Return true if this token is a C++ keyword in the specified
    282   /// language.
    283   bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
    284 
    285   /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to
    286   /// associate arbitrary metadata with this token.
    287   template<typename T>
    288   T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); }
    289   void setFETokenInfo(void *T) { FETokenInfo = T; }
    290 
    291   /// \brief Return true if the Preprocessor::HandleIdentifier must be called
    292   /// on a token of this identifier.
    293   ///
    294   /// If this returns false, we know that HandleIdentifier will not affect
    295   /// the token.
    296   bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
    297 
    298   /// \brief Return true if the identifier in its current state was loaded
    299   /// from an AST file.
    300   bool isFromAST() const { return IsFromAST; }
    301 
    302   void setIsFromAST() { IsFromAST = true; }
    303 
    304   /// \brief Determine whether this identifier has changed since it was loaded
    305   /// from an AST file.
    306   bool hasChangedSinceDeserialization() const {
    307     return ChangedAfterLoad;
    308   }
    309 
    310   /// \brief Note that this identifier has changed since it was loaded from
    311   /// an AST file.
    312   void setChangedSinceDeserialization() {
    313     ChangedAfterLoad = true;
    314   }
    315 
    316   /// \brief Determine whether the frontend token information for this
    317   /// identifier has changed since it was loaded from an AST file.
    318   bool hasFETokenInfoChangedSinceDeserialization() const {
    319     return FEChangedAfterLoad;
    320   }
    321 
    322   /// \brief Note that the frontend token information for this identifier has
    323   /// changed since it was loaded from an AST file.
    324   void setFETokenInfoChangedSinceDeserialization() {
    325     FEChangedAfterLoad = true;
    326   }
    327 
    328   /// \brief Determine whether the information for this identifier is out of
    329   /// date with respect to the external source.
    330   bool isOutOfDate() const { return OutOfDate; }
    331 
    332   /// \brief Set whether the information for this identifier is out of
    333   /// date with respect to the external source.
    334   void setOutOfDate(bool OOD) {
    335     OutOfDate = OOD;
    336     if (OOD)
    337       NeedsHandleIdentifier = true;
    338     else
    339       RecomputeNeedsHandleIdentifier();
    340   }
    341 
    342   /// \brief Determine whether this is the contextual keyword \c import.
    343   bool isModulesImport() const { return IsModulesImport; }
    344 
    345   /// \brief Set whether this identifier is the contextual keyword \c import.
    346   void setModulesImport(bool I) {
    347     IsModulesImport = I;
    348     if (I)
    349       NeedsHandleIdentifier = true;
    350     else
    351       RecomputeNeedsHandleIdentifier();
    352   }
    353 
    354   /// Return true if this identifier is an editor placeholder.
    355   ///
    356   /// Editor placeholders are produced by the code-completion engine and are
    357   /// represented as characters between '<#' and '#>' in the source code. An
    358   /// example of auto-completed call with a placeholder parameter is shown
    359   /// below:
    360   /// \code
    361   ///   function(<#int x#>);
    362   /// \endcode
    363   bool isEditorPlaceholder() const {
    364     return getName().startswith("<#") && getName().endswith("#>");
    365   }
    366 
    367   /// \brief Provide less than operator for lexicographical sorting.
    368   bool operator<(const IdentifierInfo &RHS) const {
    369     return getName() < RHS.getName();
    370   }
    371 
    372 private:
    373   /// The Preprocessor::HandleIdentifier does several special (but rare)
    374   /// things to identifiers of various sorts.  For example, it changes the
    375   /// \c for keyword token from tok::identifier to tok::for.
    376   ///
    377   /// This method is very tied to the definition of HandleIdentifier.  Any
    378   /// change to it should be reflected here.
    379   void RecomputeNeedsHandleIdentifier() {
    380     NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
    381                             isExtensionToken() || isFutureCompatKeyword() ||
    382                             isOutOfDate() || isModulesImport();
    383   }
    384 };
    385 
    386 /// \brief An RAII object for [un]poisoning an identifier within a scope.
    387 ///
    388 /// \p II is allowed to be null, in which case objects of this type have
    389 /// no effect.
    390 class PoisonIdentifierRAIIObject {
    391   IdentifierInfo *const II;
    392   const bool OldValue;
    393 
    394 public:
    395   PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
    396     : II(II), OldValue(II ? II->isPoisoned() : false) {
    397     if(II)
    398       II->setIsPoisoned(NewValue);
    399   }
    400 
    401   ~PoisonIdentifierRAIIObject() {
    402     if(II)
    403       II->setIsPoisoned(OldValue);
    404   }
    405 };
    406 
    407 /// \brief An iterator that walks over all of the known identifiers
    408 /// in the lookup table.
    409 ///
    410 /// Since this iterator uses an abstract interface via virtual
    411 /// functions, it uses an object-oriented interface rather than the
    412 /// more standard C++ STL iterator interface. In this OO-style
    413 /// iteration, the single function \c Next() provides dereference,
    414 /// advance, and end-of-sequence checking in a single
    415 /// operation. Subclasses of this iterator type will provide the
    416 /// actual functionality.
    417 class IdentifierIterator {
    418 protected:
    419   IdentifierIterator() = default;
    420 
    421 public:
    422   IdentifierIterator(const IdentifierIterator &) = delete;
    423   IdentifierIterator &operator=(const IdentifierIterator &) = delete;
    424 
    425   virtual ~IdentifierIterator();
    426 
    427   /// \brief Retrieve the next string in the identifier table and
    428   /// advances the iterator for the following string.
    429   ///
    430   /// \returns The next string in the identifier table. If there is
    431   /// no such string, returns an empty \c StringRef.
    432   virtual StringRef Next() = 0;
    433 };
    434 
    435 /// \brief Provides lookups to, and iteration over, IdentiferInfo objects.
    436 class IdentifierInfoLookup {
    437 public:
    438   virtual ~IdentifierInfoLookup();
    439 
    440   /// \brief Return the IdentifierInfo for the specified named identifier.
    441   ///
    442   /// Unlike the version in IdentifierTable, this returns a pointer instead
    443   /// of a reference.  If the pointer is null then the IdentifierInfo cannot
    444   /// be found.
    445   virtual IdentifierInfo* get(StringRef Name) = 0;
    446 
    447   /// \brief Retrieve an iterator into the set of all identifiers
    448   /// known to this identifier lookup source.
    449   ///
    450   /// This routine provides access to all of the identifiers known to
    451   /// the identifier lookup, allowing access to the contents of the
    452   /// identifiers without introducing the overhead of constructing
    453   /// IdentifierInfo objects for each.
    454   ///
    455   /// \returns A new iterator into the set of known identifiers. The
    456   /// caller is responsible for deleting this iterator.
    457   virtual IdentifierIterator *getIdentifiers();
    458 };
    459 
    460 /// \brief Implements an efficient mapping from strings to IdentifierInfo nodes.
    461 ///
    462 /// This has no other purpose, but this is an extremely performance-critical
    463 /// piece of the code, as each occurrence of every identifier goes through
    464 /// here when lexed.
    465 class IdentifierTable {
    466   // Shark shows that using MallocAllocator is *much* slower than using this
    467   // BumpPtrAllocator!
    468   typedef llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator> HashTableTy;
    469   HashTableTy HashTable;
    470 
    471   IdentifierInfoLookup* ExternalLookup;
    472 
    473 public:
    474   /// \brief Create the identifier table, populating it with info about the
    475   /// language keywords for the language specified by \p LangOpts.
    476   IdentifierTable(const LangOptions &LangOpts,
    477                   IdentifierInfoLookup* externalLookup = nullptr);
    478 
    479   /// \brief Set the external identifier lookup mechanism.
    480   void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
    481     ExternalLookup = IILookup;
    482   }
    483 
    484   /// \brief Retrieve the external identifier lookup object, if any.
    485   IdentifierInfoLookup *getExternalIdentifierLookup() const {
    486     return ExternalLookup;
    487   }
    488 
    489   llvm::BumpPtrAllocator& getAllocator() {
    490     return HashTable.getAllocator();
    491   }
    492 
    493   /// \brief Return the identifier token info for the specified named
    494   /// identifier.
    495   IdentifierInfo &get(StringRef Name) {
    496     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
    497 
    498     IdentifierInfo *&II = Entry.second;
    499     if (II) return *II;
    500 
    501     // No entry; if we have an external lookup, look there first.
    502     if (ExternalLookup) {
    503       II = ExternalLookup->get(Name);
    504       if (II)
    505         return *II;
    506     }
    507 
    508     // Lookups failed, make a new IdentifierInfo.
    509     void *Mem = getAllocator().Allocate<IdentifierInfo>();
    510     II = new (Mem) IdentifierInfo();
    511 
    512     // Make sure getName() knows how to find the IdentifierInfo
    513     // contents.
    514     II->Entry = &Entry;
    515 
    516     return *II;
    517   }
    518 
    519   IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
    520     IdentifierInfo &II = get(Name);
    521     II.TokenID = TokenCode;
    522     assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
    523     return II;
    524   }
    525 
    526   /// \brief Gets an IdentifierInfo for the given name without consulting
    527   ///        external sources.
    528   ///
    529   /// This is a version of get() meant for external sources that want to
    530   /// introduce or modify an identifier. If they called get(), they would
    531   /// likely end up in a recursion.
    532   IdentifierInfo &getOwn(StringRef Name) {
    533     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
    534 
    535     IdentifierInfo *&II = Entry.second;
    536     if (II)
    537       return *II;
    538 
    539     // Lookups failed, make a new IdentifierInfo.
    540     void *Mem = getAllocator().Allocate<IdentifierInfo>();
    541     II = new (Mem) IdentifierInfo();
    542 
    543     // Make sure getName() knows how to find the IdentifierInfo
    544     // contents.
    545     II->Entry = &Entry;
    546 
    547     // If this is the 'import' contextual keyword, mark it as such.
    548     if (Name.equals("import"))
    549       II->setModulesImport(true);
    550 
    551     return *II;
    552   }
    553 
    554   typedef HashTableTy::const_iterator iterator;
    555   typedef HashTableTy::const_iterator const_iterator;
    556 
    557   iterator begin() const { return HashTable.begin(); }
    558   iterator end() const   { return HashTable.end(); }
    559   unsigned size() const  { return HashTable.size(); }
    560 
    561   /// \brief Print some statistics to stderr that indicate how well the
    562   /// hashing is doing.
    563   void PrintStats() const;
    564 
    565   void AddKeywords(const LangOptions &LangOpts);
    566 };
    567 
    568 /// \brief A family of Objective-C methods.
    569 ///
    570 /// These families have no inherent meaning in the language, but are
    571 /// nonetheless central enough in the existing implementations to
    572 /// merit direct AST support.  While, in theory, arbitrary methods can
    573 /// be considered to form families, we focus here on the methods
    574 /// involving allocation and retain-count management, as these are the
    575 /// most "core" and the most likely to be useful to diverse clients
    576 /// without extra information.
    577 ///
    578 /// Both selectors and actual method declarations may be classified
    579 /// into families.  Method families may impose additional restrictions
    580 /// beyond their selector name; for example, a method called '_init'
    581 /// that returns void is not considered to be in the 'init' family
    582 /// (but would be if it returned 'id').  It is also possible to
    583 /// explicitly change or remove a method's family.  Therefore the
    584 /// method's family should be considered the single source of truth.
    585 enum ObjCMethodFamily {
    586   /// \brief No particular method family.
    587   OMF_None,
    588 
    589   // Selectors in these families may have arbitrary arity, may be
    590   // written with arbitrary leading underscores, and may have
    591   // additional CamelCase "words" in their first selector chunk
    592   // following the family name.
    593   OMF_alloc,
    594   OMF_copy,
    595   OMF_init,
    596   OMF_mutableCopy,
    597   OMF_new,
    598 
    599   // These families are singletons consisting only of the nullary
    600   // selector with the given name.
    601   OMF_autorelease,
    602   OMF_dealloc,
    603   OMF_finalize,
    604   OMF_release,
    605   OMF_retain,
    606   OMF_retainCount,
    607   OMF_self,
    608   OMF_initialize,
    609 
    610   // performSelector families
    611   OMF_performSelector
    612 };
    613 
    614 /// Enough bits to store any enumerator in ObjCMethodFamily or
    615 /// InvalidObjCMethodFamily.
    616 enum { ObjCMethodFamilyBitWidth = 4 };
    617 
    618 /// \brief An invalid value of ObjCMethodFamily.
    619 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
    620 
    621 /// \brief A family of Objective-C methods.
    622 ///
    623 /// These are family of methods whose result type is initially 'id', but
    624 /// but are candidate for the result type to be changed to 'instancetype'.
    625 enum ObjCInstanceTypeFamily {
    626   OIT_None,
    627   OIT_Array,
    628   OIT_Dictionary,
    629   OIT_Singleton,
    630   OIT_Init,
    631   OIT_ReturnsSelf
    632 };
    633 
    634 enum ObjCStringFormatFamily {
    635   SFF_None,
    636   SFF_NSString,
    637   SFF_CFString
    638 };
    639 
    640 /// \brief Smart pointer class that efficiently represents Objective-C method
    641 /// names.
    642 ///
    643 /// This class will either point to an IdentifierInfo or a
    644 /// MultiKeywordSelector (which is private). This enables us to optimize
    645 /// selectors that take no arguments and selectors that take 1 argument, which
    646 /// accounts for 78% of all selectors in Cocoa.h.
    647 class Selector {
    648   friend class Diagnostic;
    649 
    650   enum IdentifierInfoFlag {
    651     // Empty selector = 0.
    652     ZeroArg  = 0x1,
    653     OneArg   = 0x2,
    654     MultiArg = 0x3,
    655     ArgFlags = ZeroArg|OneArg
    656   };
    657   uintptr_t InfoPtr; // a pointer to the MultiKeywordSelector or IdentifierInfo.
    658 
    659   Selector(IdentifierInfo *II, unsigned nArgs) {
    660     InfoPtr = reinterpret_cast<uintptr_t>(II);
    661     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
    662     assert(nArgs < 2 && "nArgs not equal to 0/1");
    663     InfoPtr |= nArgs+1;
    664   }
    665   Selector(MultiKeywordSelector *SI) {
    666     InfoPtr = reinterpret_cast<uintptr_t>(SI);
    667     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
    668     InfoPtr |= MultiArg;
    669   }
    670 
    671   IdentifierInfo *getAsIdentifierInfo() const {
    672     if (getIdentifierInfoFlag() < MultiArg)
    673       return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
    674     return nullptr;
    675   }
    676 
    677   MultiKeywordSelector *getMultiKeywordSelector() const {
    678     return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
    679   }
    680 
    681   unsigned getIdentifierInfoFlag() const {
    682     return InfoPtr & ArgFlags;
    683   }
    684 
    685   static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
    686 
    687   static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
    688 
    689 public:
    690   friend class SelectorTable; // only the SelectorTable can create these
    691   friend class DeclarationName; // and the AST's DeclarationName.
    692 
    693   /// The default ctor should only be used when creating data structures that
    694   ///  will contain selectors.
    695   Selector() : InfoPtr(0) {}
    696   Selector(uintptr_t V) : InfoPtr(V) {}
    697 
    698   /// operator==/!= - Indicate whether the specified selectors are identical.
    699   bool operator==(Selector RHS) const {
    700     return InfoPtr == RHS.InfoPtr;
    701   }
    702   bool operator!=(Selector RHS) const {
    703     return InfoPtr != RHS.InfoPtr;
    704   }
    705 
    706   void *getAsOpaquePtr() const {
    707     return reinterpret_cast<void*>(InfoPtr);
    708   }
    709 
    710   /// \brief Determine whether this is the empty selector.
    711   bool isNull() const { return InfoPtr == 0; }
    712 
    713   // Predicates to identify the selector type.
    714   bool isKeywordSelector() const {
    715     return getIdentifierInfoFlag() != ZeroArg;
    716   }
    717 
    718   bool isUnarySelector() const {
    719     return getIdentifierInfoFlag() == ZeroArg;
    720   }
    721 
    722   unsigned getNumArgs() const;
    723 
    724   /// \brief Retrieve the identifier at a given position in the selector.
    725   ///
    726   /// Note that the identifier pointer returned may be NULL. Clients that only
    727   /// care about the text of the identifier string, and not the specific,
    728   /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
    729   /// an empty string when the identifier pointer would be NULL.
    730   ///
    731   /// \param argIndex The index for which we want to retrieve the identifier.
    732   /// This index shall be less than \c getNumArgs() unless this is a keyword
    733   /// selector, in which case 0 is the only permissible value.
    734   ///
    735   /// \returns the uniqued identifier for this slot, or NULL if this slot has
    736   /// no corresponding identifier.
    737   IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
    738 
    739   /// \brief Retrieve the name at a given position in the selector.
    740   ///
    741   /// \param argIndex The index for which we want to retrieve the name.
    742   /// This index shall be less than \c getNumArgs() unless this is a keyword
    743   /// selector, in which case 0 is the only permissible value.
    744   ///
    745   /// \returns the name for this slot, which may be the empty string if no
    746   /// name was supplied.
    747   StringRef getNameForSlot(unsigned argIndex) const;
    748 
    749   /// \brief Derive the full selector name (e.g. "foo:bar:") and return
    750   /// it as an std::string.
    751   std::string getAsString() const;
    752 
    753   /// \brief Prints the full selector name (e.g. "foo:bar:").
    754   void print(llvm::raw_ostream &OS) const;
    755 
    756   /// \brief Derive the conventional family of this method.
    757   ObjCMethodFamily getMethodFamily() const {
    758     return getMethodFamilyImpl(*this);
    759   }
    760 
    761   ObjCStringFormatFamily getStringFormatFamily() const {
    762     return getStringFormatFamilyImpl(*this);
    763   }
    764 
    765   static Selector getEmptyMarker() {
    766     return Selector(uintptr_t(-1));
    767   }
    768 
    769   static Selector getTombstoneMarker() {
    770     return Selector(uintptr_t(-2));
    771   }
    772 
    773   static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
    774 };
    775 
    776 /// \brief This table allows us to fully hide how we implement
    777 /// multi-keyword caching.
    778 class SelectorTable {
    779   void *Impl;  // Actually a SelectorTableImpl
    780 
    781 public:
    782   SelectorTable();
    783   SelectorTable(const SelectorTable &) = delete;
    784   SelectorTable &operator=(const SelectorTable &) = delete;
    785   ~SelectorTable();
    786 
    787   /// \brief Can create any sort of selector.
    788   ///
    789   /// \p NumArgs indicates whether this is a no argument selector "foo", a
    790   /// single argument selector "foo:" or multi-argument "foo:bar:".
    791   Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
    792 
    793   Selector getUnarySelector(IdentifierInfo *ID) {
    794     return Selector(ID, 1);
    795   }
    796   Selector getNullarySelector(IdentifierInfo *ID) {
    797     return Selector(ID, 0);
    798   }
    799 
    800   /// \brief Return the total amount of memory allocated for managing selectors.
    801   size_t getTotalMemory() const;
    802 
    803   /// \brief Return the default setter name for the given identifier.
    804   ///
    805   /// This is "set" + \p Name where the initial character of \p Name
    806   /// has been capitalized.
    807   static SmallString<64> constructSetterName(StringRef Name);
    808 
    809   /// \brief Return the default setter selector for the given identifier.
    810   ///
    811   /// This is "set" + \p Name where the initial character of \p Name
    812   /// has been capitalized.
    813   static Selector constructSetterSelector(IdentifierTable &Idents,
    814                                           SelectorTable &SelTable,
    815                                           const IdentifierInfo *Name);
    816 };
    817 
    818 /// DeclarationNameExtra - Common base of the MultiKeywordSelector,
    819 /// CXXSpecialName, and CXXOperatorIdName classes, all of which are
    820 /// private classes that describe different kinds of names.
    821 class DeclarationNameExtra {
    822 public:
    823   /// ExtraKind - The kind of "extra" information stored in the
    824   /// DeclarationName. See @c ExtraKindOrNumArgs for an explanation of
    825   /// how these enumerator values are used.
    826   enum ExtraKind {
    827     CXXConstructor = 0,
    828     CXXDestructor,
    829     CXXConversionFunction,
    830 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
    831     CXXOperator##Name,
    832 #include "clang/Basic/OperatorKinds.def"
    833     CXXDeductionGuide,
    834     CXXLiteralOperator,
    835     CXXUsingDirective,
    836     NUM_EXTRA_KINDS
    837   };
    838 
    839   /// ExtraKindOrNumArgs - Either the kind of C++ special name or
    840   /// operator-id (if the value is one of the CXX* enumerators of
    841   /// ExtraKind), in which case the DeclarationNameExtra is also a
    842   /// CXXSpecialName, (for CXXConstructor, CXXDestructor, or
    843   /// CXXConversionFunction) CXXOperatorIdName, or CXXLiteralOperatorName,
    844   /// it may be also name common to C++ using-directives (CXXUsingDirective),
    845   /// otherwise it is NUM_EXTRA_KINDS+NumArgs, where NumArgs is the number of
    846   /// arguments in the Objective-C selector, in which case the
    847   /// DeclarationNameExtra is also a MultiKeywordSelector.
    848   unsigned ExtraKindOrNumArgs;
    849 };
    850 
    851 }  // end namespace clang
    852 
    853 namespace llvm {
    854 
    855 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
    856 /// DenseSets.
    857 template <>
    858 struct DenseMapInfo<clang::Selector> {
    859   static inline clang::Selector getEmptyKey() {
    860     return clang::Selector::getEmptyMarker();
    861   }
    862 
    863   static inline clang::Selector getTombstoneKey() {
    864     return clang::Selector::getTombstoneMarker();
    865   }
    866 
    867   static unsigned getHashValue(clang::Selector S);
    868 
    869   static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
    870     return LHS == RHS;
    871   }
    872 };
    873 
    874 template <>
    875 struct isPodLike<clang::Selector> { static const bool value = true; };
    876 
    877 template <typename T> struct PointerLikeTypeTraits;
    878 
    879 template<>
    880 struct PointerLikeTypeTraits<clang::Selector> {
    881   static inline const void *getAsVoidPointer(clang::Selector P) {
    882     return P.getAsOpaquePtr();
    883   }
    884 
    885   static inline clang::Selector getFromVoidPointer(const void *P) {
    886     return clang::Selector(reinterpret_cast<uintptr_t>(P));
    887   }
    888 
    889   enum { NumLowBitsAvailable = 0 };
    890 };
    891 
    892 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
    893 // are not guaranteed to be 8-byte aligned.
    894 template<>
    895 struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
    896   static inline void *getAsVoidPointer(clang::IdentifierInfo* P) {
    897     return P;
    898   }
    899 
    900   static inline clang::IdentifierInfo *getFromVoidPointer(void *P) {
    901     return static_cast<clang::IdentifierInfo*>(P);
    902   }
    903 
    904   enum { NumLowBitsAvailable = 1 };
    905 };
    906 
    907 template<>
    908 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
    909   static inline const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
    910     return P;
    911   }
    912 
    913   static inline const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
    914     return static_cast<const clang::IdentifierInfo*>(P);
    915   }
    916 
    917   enum { NumLowBitsAvailable = 1 };
    918 };
    919 
    920 } // end namespace llvm
    921 
    922 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
    923